2016-05-04 12 views
0

私は、mnistタスクを解決するために、Pythonでニューラルネットワークをコーディングしました。 しかし、1エポック後のエラー率は実際にはほとんど変わらず(カンマの後の6桁目)、ネットワークは10000エポック後にかなり学習していません... 私が間違ったやり方を教えてください。 mnist? 学習率etaを0.05に設定しました。ニューラルネットワークがmnistで失敗する

import numpy as np 
import pickle 
import time 

class FeedForwardNetwork(): 

    def __init__(self, input_dim, hidden_dim, output_dim): 
     self.input_dim = input_dim 
     self.hidden_dim = hidden_dim 
     self.output_dim = output_dim 
     self.input_layer = np.array([]) 
     self.hidden_layer = np.array([]) 
     self.output_layer = np.array([]) 
     self.weights_input_hidden = (2 * np.random.random((input_dim, hidden_dim)) - 1)/1000 
     self.weights_hidden_output = (2* np.random.random((hidden_dim, output_dim)) - 1)/1000 

     self.validation_data = np.array([]) 
     self.validation_data_solution = np.array([]) 

    def _tanh(self, x, deriv=False): 
     if not deriv: 
      return np.tanh(x) 
     return 1-np.tanh(x)**2 

    def _softmax(self, x): 
     return np.exp(x)/np.sum(np.exp(x), axis=0) 

    def set_training_data(self, training_data_input, training_data_target): 
     """Splits the data up into training and validation data with a ratio of 0.75/0.25 and sets the data for training.""" 
     if len(training_data_input) != len(training_data_target): 
      raise Exception("Number of training examples and training targets does not match!") 
     len_training_data = int((len(training_data_input)/100*75)//1) 
     self.input_layer = training_data_input[:len_training_data] 
     self.output_layer = training_data_target[:len_training_data] 
     self.validation_data = np.array([training_data_input[len_training_data:]]) 
     self.validation_data_solution = np.array([training_data_target[len_training_data:]]) 

    def save(self, filename): 
     """Saves the weights into a pickle file.""" 
     with open(filename, "wb") as network_file: 
      pickle.dump(self.weights_input_hidden, network_file) 
      pickle.dump(self.weights_hidden_output, network_file) 

    def load(self, filename): 
     """Loads network weights from a pickle file.""" 
     with open(filename, "rb") as network_file: 
      weights_input_hidden = pickle.load(network_file) 
      weights_hidden_output = pickle.load(network_file) 

     if len(weights_input_hidden) != len(self.weights_input_hidden): 
      raise Exception("File contains weights that does not match the current networks size!") 
     if len(weights_hidden_output) != len(self.weights_hidden_output): 
      raise Exception("File contains weights that does not match the current networks size!") 

     self.weights_input_hidden = weights_input_hidden 
     self.weights_hidden_output = weights_hidden_output 

    def measure_error(self, input_data, output_data): 
     return 1/2 * np.sum((output_data - self.activate(input_data))**2) 

    def forward_propagate(self, input_data): 
     """Proceds the input data from input neurons up to output neurons and returns the output layer""" 
     input_layer = input_data 
     self.hidden_layer = self.__tanh(np.dot(input_layer, self.weights_input_hidden)) 
     output_layer = self.__tanh(np.dot(self.hidden_layer, self.weights_hidden_output)) 
     return output_layer 

    def activate(self, input_data): 
     """Sends the given input through the net and returns the net's prediction.""" 
     return self.forward_propagate(input_data) 

    def back_propagate(self, input_data, output_data, eta): 
     """Calculates the difference between target output and output and adjust the weights to fit the target output better. 
      The parameter eta is the learning rate.""" 
     num_of_samples = len(input_data) 
     output_layer = self.forward_propagate(input_data) 
     output_layer_error = output_data - output_layer 
     output_layer_delta = output_layer_error * self.__tanh(output_layer, deriv=True) 
     #How much did each hidden neuron contribute to the output error? 
     #Multiplys delta term with weights 
     hidden_layer_error = output_layer_delta.dot(self.weights_hidden_output.T) 

     #If the prediction is good, the second term will be small and the change will be small 
     #Ex: target: 1 -> Slope will be 1 so the second term will be big 
     hidden_layer_delta = hidden_layer_error * self.__tanh(self.hidden_layer, deriv=True) 
     #The both lines return a matrix. A row stands for all weights connected to one neuron. 
     #E.g. [1, 2, 3] -> Weights to Neuron A 
     #  [4, 5, 6] -> Weights to Neuron B 
     hidden_weights_change = self.input_layer.T.dot(hidden_layer_delta)/num_of_samples 
     output_weights_change = self.hidden_layer.T.dot(output_layer_delta)/num_of_samples 

     self.weights_hidden_output += (output_weights_change * eta)/num_of_samples 
     self.weights_input_hidden += (hidden_weights_change * eta)/num_of_samples 

    def batch_train(self, epochs, eta, patience=10): 
     """Trains the network in batch mode that means the weigts are updated after showing all training examples. 
      Eta is the learning rate and patience is the number of epochs that the validation error is allowed to increase before aborting.""" 
     validation_error = self.measure_error(self.validation_data, self.validation_data_solution) 
     for epoch in range(epochs): 
      self.back_propagate(self.input_layer, self.output_layer, eta) 
      validation_error_new = self.measure_error(self.validation_data, self.validation_data_solution) 
      if validation_error_new < validation_error: 
       validation_error = validation_error_new 
      else: 
       patience -= 1 
       if patience == 0: 
        print("Abort Training. Overfitting has started! Epoch: {0}. Error: {1}".format(epoch, validation_error_new)) 
        return 
      print("Epoch: {0}, Error: {1}".format(epoch, validation_error)) 
      self.save("Network_Mnist.net") 

ありがとう!

Epoch: 1813, Error: 7499.944371111551 Epoch: 1814, Error: 7499.944368765047

答えて

0

クロスエントロピーエラーのsoftmaxレイヤーを追加したいと思うかもしれません。 入力が負の場合、Tanhは負の値を出力しますが、確率は[0、1]の範囲内である必要があるため、出力レイヤーでは望ましくありません。

ThisはおもちゃフィードフォワードNNです。あなたに役立つかもしれません。

関連する問題