2017-12-30 40 views
0
import csv 
import random 
import math 

def loadCsv(filename): 
    lines = csv.reader(open(filename, "rb")) 
    dataset = list(lines) 
    for i in range(len(dataset)): 
     dataset[i] = [float(x) for x in dataset[i]] 
    return dataset 

def splitDataset(dataset, splitRatio): 
    trainSize = int(len(dataset) * splitRatio) 
    trainSet = [] 
    copy = list(dataset) 
    while len(trainSet) < trainSize: 
     index = random.randrange(len(copy)) 
     trainSet.append(copy.pop(index)) 
    return [trainSet, copy] 

def separateByClass(dataset): 
    separated = {} 
    for i in range(len(dataset)): 
     vector = dataset[i] 
     if (vector[-1] not in separated): 
      separated[vector[-1]] = [] 
     separated[vector[-1]].append(vector) 
    return separated 

def mean(numbers): 
    return sum(numbers)/float(len(numbers)) 

def stdev(numbers): 
    avg = mean(numbers) 
    variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1) 
    return math.sqrt(variance) 

def summarize(dataset): 
    summaries = [(mean(attribute), stdev(attribute)) for attribute in 
    zip(*dataset)] 
    del summaries[-1] 
    return summaries 

def summarizeByClass(dataset): 
    separated = separateByClass(dataset) 
    summaries = {} 
    for classValue, instances in separated.iteritems(): 
     summaries[classValue] = summarize(instances) 
    return summaries 

def calculateProbability(x, mean, stdev): 
    exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2)))) 
    return (1/(math.sqrt(2*math.pi) * stdev)) * exponent 

def calculateClassProbabilities(summaries, inputVector): 
    probabilities = {} 
    for classValue, classSummaries in summaries.iteritems(): 
     probabilities[classValue] = 1 
     for i in range(len(classSummaries)): 
      mean, stdev = classSummaries[i] 
      x = inputVector[i] 
      probabilities[classValue] *= calculateProbability(x, mean, stdev) 
    return probabilities 

def predict(summaries, inputVector): 
    probabilities = calculateClassProbabilities(summaries, inputVector) 
    bestLabel, bestProb = None, -1 
    for classValue, probability in probabilities.iteritems(): 
     if bestLabel is None or probability > bestProb: 
      bestProb = probability 
      bestLabel = classValue 
    return bestLabel 

def getPredictions(summaries, testSet): 
    predictions = [] 
    for i in range(len(testSet)): 
     result = predict(summaries, testSet[i]) 
     predictions.append(result) 
    return predictions 

def getAccuracy(testSet, predictions): 
    correct = 0 
    for i in range(len(testSet)): 
     if testSet[i][-1] == predictions[i]: 
      correct += 1 
    return (correct/float(len(testSet))) * 100.0 

def main(): 
    filename = 'processed.cleveland.data.csv' 
    splitRatio = 0.67 
    dataset = loadCsv(filename) 
    trainingSet, testSet = splitDataset(dataset, splitRatio) 
    print('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet)) 
    summaries = summarizeByClass(trainingSet) 
    predictions = getPredictions(summaries, testSet) 
    accuracy = getAccuracy(testSet, predictions) 
    print('Accuracy: {0}%').format(accuracy) 

main() 

上記のコードは、Pythonスクリプトを学習する純粋なベイズマシンです。私はprocessed.cleveland.data.csvに格納されているデータセットでコードを使用しようとしています。しかし、私は次のエラーを得続ける:端末で実行しているときにPythonコードでエラーが発生する

Traceback (most recent call last): 
File "./naivebayespython.py", line 101, in <module> 
main() 
File "./naivebayespython.py", line 91, in main 
dataset = loadCsv(filename) 
File "./naivebayespython.py", line 10, in loadCsv 
dataset[i] = [float(x) for x in dataset[i]] 
ValueError: could not convert string to float: ? 

誰かが私が間違っているのを教えてこの問題を解決する方法を提案してくださいもらえますか?私はPythonには比較的新しいので、説明も役立ちます。ありがとう!

+3

のどうやら1をキャプチャしますあなたの 'dataset'の要素は' '? ''であり、数値ではありません。 – melpomene

+0

[ValueError:stringをfloatに変換できませんでした:id](https://stackoverflow.com/questions/8420143/valueerror-could-not-convert-string-to-float-id) – wwii

+0

@melpomene私は行を '?'で取り除いた後でさえも動作しません。 –

答えて

1

あなたがtry:except: で変換エラーをキャッチが、浮きconversable何であるかを認識することができます - 完全なリストについては、この答えを参照してください。https://stackoverflow.com/a/20929881/7505395

変更が間違って変換

def checkIfFloatable(something): # change the name ;) 
    try: 
     if float(something): 
      return True 
    except: 
     return False 

def loadCsv(filename): 
    lines = csv.reader(open(filename, "rb")) 
    dataset = list(lines) 
    for i in range(len(dataset)): 
     dataset[i] = [float(x) for x in dataset[i] if checkIfFloatable(x)] # else None 
    return dataset 
関連する問題