2016-09-21 6 views
9

私はPython 2.7.5 Linux CentOS 7マシンで作業しています。 確率的ニューラルネットワーク(PNN)私のデータセットを適用して、バイナリ分類問題を解決しようとしています。Python、私の確率論的ニューラルネットワーク(PNN)はいつもゼロを予測しているのですか?

私は次のPythonパッケージを使用しています:numpy、sklearn、neupy.algorithms。 私はthis example used for the iris datasetをフォローしようとしています。

問題は私のPNNは常にゼロが(ゼロに分類要素)を値、と私は理由を理解することはできません予測ということです...

は、ここに私のデータセット(「dataset_file.csv」)です。私は取得していますなぜ

import numpy as np 
from sklearn import datasets 
from sklearn.metrics import matthews_corrcoef 
from sklearn.cross_validation import StratifiedKFold 

from neupy.algorithms import PNN 

fileName="dataset_file.csv" 
TARGET_COLUMN=35 

from numpy import genfromtxt 
input_dataset_data = genfromtxt(fileName, delimiter=',', skip_header=0, usecols=(range(0, TARGET_COLUMN-1))) 
#print(input_dataset_data) 
input_dataset_target = genfromtxt(fileName, delimiter=',', skip_header=0, usecols=(TARGET_COLUMN-1)) 
#print(input_dataset_target) 

kfold_number = 2 
skfold = StratifiedKFold(input_dataset_target, kfold_number, shuffle=True) 
avarage_result = 0 

print("> Start classify input_dataset dataset") 

for i, (train, test) in enumerate(skfold, start=1): 

    pnn_network = PNN(std=0.1, step=0.2, verbose=True) 

    pnn_network.train(input_dataset_data[train], input_dataset_target[train]) 
    predictions = pnn_network.predict(input_dataset_data[test]) 

    print(predictions) 
    #print(input_dataset_target[test]) 

    mcc = matthews_corrcoef(input_dataset_target[test], predictions) 
    print "The Matthews correlation coefficient is %f" % mcc 

    print("kfold #{:<2}: Guessed {} out of {}".format(
     i, np.sum(predictions == input_dataset_target[test]), test.size 
    )) 

誰もがを知っています:34個の特徴と1つのラベルターゲットがあります(最後の列は、それが0になるか、または1):

47,1,0,1,0,20,1,0,1,24,1,1,0,2,1,8050,9,1,274,60,258,65,7,3.2,105,289,0,0,79,1,0,0,0,34,0 
55,1,0,1,0,45,1,0,0,1,1,1,1,3,0,11200,7,0,615,86,531,97,5.4,2.6,96,7541,1.6,0.8,6,1,1,1,1,42,0 
29,1,1,1,0,23,0,1,0,1,0,0,0,2,1,5300,12,1,189,30,203,72,7,3.5,93,480,0,0,90,1,0,0,0,43,1 
39,1,0,1,0,10,1,0,0,3,0,1,1,0,1,7910,14,1,462,28,197,50,8,4.5,93,459,5,2.8,45,1,1,0,0,21,0 
47,1,0,1,0,10,1,1,1,1.5,1,1,0,3,1,9120,4,0,530,71,181,60,6.2,3.8,83,213,3.6,1.95,53,1,1,0,0,11,0 
57,1,0,1,0,50,0,1,0,24,1,0,1,3,1,16000,9,0,330,78,172,74,5.9,2.9,112,332,4.1,2.1,82,1,1,0,0,23,1 
44,1,0,1,0,15,1,1,0,0.5,1,1,1,2,0,5800,14,0,275,44,155,105,7.2,3.5,84,360,3.44,1.6,55,1,1,0,0,24,0 
49,1,3,1,0,25,1,1,1,1,0,1,0,3,1,8200,12,1,441,74,237,111,6.2,3.6,79,211,0,0,91,1,0,0,0,43,0 
56,1,0,1,0,5,1,0,0,3,1,0,0,3,1,5100,7,1,188,58,185,62,7.8,3.9,112,472,0,0,83,1,0,0,0,34,0 
33,1,4,1,0,20,1,0,1,3,0,0,0,3,1,7300,10,1,329,40,139,80,6.9,3.7,89,122,3.4,1.2,75,1,1,0,0,33,0 
22,0,0,1,0,15,1,0,0,1,1,1,1,0,1,3700,8,0,617,53,267,128,6.2,3.8,91,3060,3.1,1.9,63,1,1,0,0,54,0 
82,0,5,1,0,60,1,0,1,3,1,1,1,0,0,8900,11,1,275,83,255,93,5.9,3.1,95,455,4.8,1.9,68,1,1,0,0,55,0 
49,0,2,1,0,20,1,0,1,2,1,0,0,0,1,8500,6,1,292,84,241,79,6.8,3.9,100,158,3.4,1.25,75,1,1,1,0,65,0 
51,1,4,1,0,51,1,1,1,2,1,0,1,0,1,18300,14,1,522,91,268,105,6.1,3.1,98,758,4.2,2.5,19,1,1,1,1,67,0 
61,1,2,1,0,20,1,0,0,3,1,0,0,3,1,6600,9,1,563,101,268,78,6.4,3.7,115,694,5.2,3,29,1,1,1,1,77,0 
48,0,1,1,0,28,1,0,0,12,1,0,0,3,1,9100,22,0,114,18,165,63,7.2,3.6,103,429,0,0,84,1,0,0,0,34,0 
57,0,0,1,0,40,1,0,1,1,0,0,0,3,1,8100,8,0,264,15,120,69,6.8,3.4,91,390,0,0,91,1,0,0,0,23,0 
57,0,0,1,0,25,1,0,0,12,0,1,0,0,0,6900,16,0,847,111,289,78,5.3,2.4,105,162,3.1,1.9,68,1,1,1,0,78,0 
47,1,4,1,0,40,0,1,1,6,1,1,1,2,1,21500,10,0,632,121,219,108,7.5,2.8,149,1158,3.17,1.77,8,1,1,1,0,58,1 
52,0,0,1,0,30,1,1,1,2,1,0,1,0,0,14600,5,1,405,88,280,140,5.8,3.1,121,983,3.9,1.8,17,1,1,1,1,76,0 
50,1,2,1,0,16,1,1,0,1,1,1,1,0,1,12200,9,1,280,7,176,71,7.4,4.2,105,293,4.5,2.7,68,1,1,0,0,67,0 
63,1,4,1,0,18,1,0,1,0.5,1,1,1,3,0,16400,8,0,479,93,140,64,5.8,3.7,226,1286,6.22,3.6,18,1,1,0,0,19,0 
54,0,0,1,0,20,0,0,1,8,0,0,1,0,1,7200,10,0,366,71,284,73,6.4,3.7,114,384,4.1,2.8,65,1,1,0,0,24,1 
31,0,3,1,0,10,0,1,0,1,1,1,1,1,1,3800,8,0,568,102,236,59,6.4,3.7,99,387,0,0,78,1,0,0,0,45,1 
44,0,6,1,0,10,1,1,0,2,1,1,1,0,1,7700,15,1,274,44,139,62,6.7,4.1,93,129,0,0,76,1,0,0,0,24,0 
50,0,6,1,0,20,1,0,0,3,1,1,0,0,1,5200,6,0,403,90,224,79,6.3,3.1,109,151,3.1,1.4,79,1,0,0,0,34,0 
61,1,3,1,0,30,0,1,0,3,1,0,1,2,1,11500,7,0,668,88,178,65,6.7,3.08,104,680,4.1,2.5,22,1,1,0,0,23,1 

そして、ここでは私のPythonのコードです0の値を持つ予測のみ? この問題を解決するにはの提案を教えてください。

ありがとうございます!

EDIT:

0.55,1,0,1,0,0.29,1,0,1,0.46,1,1,0,0.67,1,0.37,0.41,1,0.08,0.47,0.23,0.13,0.82,0.46,0.25,0.04,0,0,0.52,1,0,0,0,0.33,0 
0.65,1,0,1,0,0.64,1,0,0,0.02,1,1,1,1,0,0.52,0.32,0,0.18,0.67,0.47,0.2,0.64,0.38,0.23,1,0.24,0.18,0.04,1,1,1,1,0.41,0 
0.34,1,0.13,1,0,0.33,0,0.5,0,0.02,0,0,0,0.67,1,0.25,0.55,1,0.06,0.23,0.18,0.15,0.82,0.51,0.22,0.06,0,0,0.6,1,0,0,0,0.42,1 
0.46,1,0,1,0,0.14,1,0,0,0.06,0,1,1,0,1,0.37,0.64,1,0.14,0.22,0.17,0.1,0.94,0.65,0.22,0.06,0.75,0.64,0.3,1,1,0,0,0.2,0 
0.55,1,0,1,0,0.14,1,0.5,1,0.03,1,1,0,1,1,0.42,0.18,0,0.16,0.55,0.16,0.12,0.73,0.55,0.2,0.03,0.54,0.44,0.35,1,1,0,0,0.11,0 
0.67,1,0,1,0,0.71,0,0.5,0,0.46,1,0,1,1,1,0.74,0.41,0,0.1,0.6,0.15,0.15,0.69,0.42,0.27,0.04,0.61,0.48,0.54,1,1,0,0,0.22,1 
0.52,1,0,1,0,0.21,1,0.5,0,0.01,1,1,1,0.67,0,0.27,0.64,0,0.08,0.34,0.14,0.21,0.85,0.51,0.2,0.05,0.51,0.36,0.36,1,1,0,0,0.23,0 
0.58,1,0.38,1,0,0.36,1,0.5,1,0.02,0,1,0,1,1,0.38,0.55,1,0.13,0.57,0.21,0.23,0.73,0.52,0.19,0.03,0,0,0.6,1,0,0,0,0.42,0 
0.66,1,0,1,0,0.07,1,0,0,0.06,1,0,0,1,1,0.24,0.32,1,0.06,0.45,0.16,0.13,0.92,0.57,0.27,0.06,0,0,0.55,1,0,0,0,0.33,0 
0.39,1,0.5,1,0,0.29,1,0,1,0.06,0,0,0,1,1,0.34,0.45,1,0.1,0.31,0.12,0.16,0.81,0.54,0.21,0.02,0.51,0.27,0.5,1,1,0,0,0.32,0 
0.26,0,0,1,0,0.21,1,0,0,0.02,1,1,1,0,1,0.17,0.36,0,0.19,0.41,0.24,0.26,0.73,0.55,0.22,0.41,0.46,0.43,0.42,1,1,0,0,0.52,0 
0.96,0,0.63,1,0,0.86,1,0,1,0.06,1,1,1,0,0,0.41,0.5,1,0.08,0.64,0.23,0.19,0.69,0.45,0.23,0.06,0.72,0.43,0.45,1,1,0,0,0.53,0 
0.58,0,0.25,1,0,0.29,1,0,1,0.04,1,0,0,0,1,0.4,0.27,1,0.09,0.65,0.21,0.16,0.8,0.57,0.24,0.02,0.51,0.28,0.5,1,1,1,0,0.63,0 
0.6,1,0.5,1,0,0.73,1,0.5,1,0.04,1,0,1,0,1,0.85,0.64,1,0.16,0.71,0.24,0.21,0.72,0.45,0.23,0.1,0.63,0.57,0.13,1,1,1,1,0.65,0 
0.72,1,0.25,1,0,0.29,1,0,0,0.06,1,0,0,1,1,0.31,0.41,1,0.17,0.78,0.24,0.16,0.75,0.54,0.27,0.09,0.78,0.68,0.19,1,1,1,1,0.75,0 
0.56,0,0.13,1,0,0.4,1,0,0,0.23,1,0,0,1,1,0.42,1,0,0.03,0.14,0.15,0.13,0.85,0.52,0.24,0.06,0,0,0.56,1,0,0,0,0.33,0 
0.67,0,0,1,0,0.57,1,0,1,0.02,0,0,0,1,1,0.38,0.36,0,0.08,0.12,0.11,0.14,0.8,0.49,0.22,0.05,0,0,0.6,1,0,0,0,0.22,0 
0.67,0,0,1,0,0.36,1,0,0,0.23,0,1,0,0,0,0.32,0.73,0,0.25,0.86,0.26,0.16,0.62,0.35,0.25,0.02,0.46,0.43,0.45,1,1,1,0,0.76,0 
0.55,1,0.5,1,0,0.57,0,0.5,1,0.12,1,1,1,0.67,1,1,0.45,0,0.19,0.94,0.19,0.22,0.88,0.41,0.35,0.15,0.47,0.4,0.05,1,1,1,0,0.56,1 
0.61,0,0,1,0,0.43,1,0.5,1,0.04,1,0,1,0,0,0.68,0.23,1,0.12,0.68,0.25,0.29,0.68,0.45,0.29,0.13,0.58,0.41,0.11,1,1,1,1,0.74,0 
0.59,1,0.25,1,0,0.23,1,0.5,0,0.02,1,1,1,0,1,0.57,0.41,1,0.08,0.05,0.16,0.15,0.87,0.61,0.25,0.04,0.67,0.61,0.45,1,1,0,0,0.65,0 
0.74,1,0.5,1,0,0.26,1,0,1,0.01,1,1,1,1,0,0.76,0.36,0,0.14,0.72,0.12,0.13,0.68,0.54,0.54,0.17,0.93,0.82,0.12,1,1,0,0,0.18,0 
0.64,0,0,1,0,0.29,0,0,1,0.15,0,0,1,0,1,0.33,0.45,0,0.11,0.55,0.25,0.15,0.75,0.54,0.27,0.05,0.61,0.64,0.43,1,1,0,0,0.23,1 
0.36,0,0.38,1,0,0.14,0,0.5,0,0.02,1,1,1,0.33,1,0.18,0.36,0,0.17,0.79,0.21,0.12,0.75,0.54,0.24,0.05,0,0,0.52,1,0,0,0,0.44,1 
0.52,0,0.75,1,0,0.14,1,0.5,0,0.04,1,1,1,0,1,0.36,0.68,1,0.08,0.34,0.12,0.13,0.79,0.59,0.22,0.02,0,0,0.5,1,0,0,0,0.23,0 
0.59,0,0.75,1,0,0.29,1,0,0,0.06,1,1,0,0,1,0.24,0.27,0,0.12,0.7,0.2,0.16,0.74,0.45,0.26,0.02,0.46,0.32,0.52,1,0,0,0,0.33,0 
0.72,1,0.38,1,0,0.43,0,0.5,0,0.06,1,0,1,0.67,1,0.53,0.32,0,0.2,0.68,0.16,0.13,0.79,0.45,0.25,0.09,0.61,0.57,0.15,1,1,0,0,0.22,1 
+3

** normalize **あなたのデータ。使用しているネットワークは本質的に極端にローカルなので、一部の機能が他の機能より100倍大きい場合はうまく機能しません。ランダムなこと - 彼らがここで "PNN"と呼んでいるものは単なるナイーブなカーネル密度推定器です。そこには "ニューラルネット"はありません。 – lejlot

+0

ご意見ありがとうございます。私はデータセットを正規化しようとしましたが、同じ結果が得られます... –

+1

結果は正常です。 Python 2と3の両方でテストされています。K-Foldを生成するときに 'shuffle = True'を使うので、そういう気持ちがあります。証明が必要な場合は、ここに[imgur image link](http://imgur.com/a/Z1R9g) – Jeon

答えて

5

私はあなたのコードを試してみました、あなたのネットワークが両方のクラスを予測するように私には思える:

import numpy as np 
from sklearn.cross_validation import StratifiedKFold 
from neupy.algorithms import PNN 

fileName="dataset_file.csv" 
TARGET_COLUMN=35 

from numpy import genfromtxt 
input_dataset_data = genfromtxt(fileName, delimiter=',', skip_header=0, usecols=(range(0, TARGET_COLUMN-1))) 
input_dataset_target = genfromtxt(fileName, delimiter=',', skip_header=0, usecols=(TARGET_COLUMN-1)) 

kfold_number = 5 
skfold = StratifiedKFold(input_dataset_target, kfold_number, shuffle=True) 

print("> Start classify input_dataset dataset") 
for std in [0.2, 0.4, 0.6, 0.8, 1]: 
    average_results = [] 
    for i, (train, test) in enumerate(skfold, start=1): 
      pnn_network = PNN(std=std, step=0.2, verbose=False, batch_size=2) 
      pnn_network.train(input_dataset_data[train], input_dataset_target[train]) 
      predictions = pnn_network.predict(input_dataset_data[test]) 
      print("Positive in predictions:", 1 in predictions) 
      average_result.append(np.sum(predictions == input_dataset_target[test]) /float(len(predictions))) 
    print std, np.average(average_result) 

出力例はここ(列によって正規化)正規化されたデータセットがありますチューニング中に:

1 0.881558441558 
('Positive in predictions:', True) 
('Positive in predictions:', True) 
('Positive in predictions:', True) 
('Positive in predictions:', True) 
('Positive in predictions:', True) 

これは、std=1の場合、倍率の平均精度sは〜0.88であり、層別交差検定のラウンドのそれぞれにおいて1が予測される。私はあなたの編集の正規化されたデータを使用しました。

関連する問題