2016-10-20 3 views

質問:列車とテストセットを分けるランダムなフォレストの例はありますか? Accord-Net MLテストプロジェクトで見つかった私の例は、トレーニングとテストで全く同じデータを使用しています。Accord.Netで列車とテストコードブックを同期する方法


int[] trainOutputs = trainCodebook.Translate("Output", trainLabels); 

And the test labels similarly: 

int[] testOutputs = testCodebook.Translate("Output", testLabels); 

Finally I train with the train data and test with the test data: 

var forest = teacher.Learn(trainVectors, trainOutputs); 

int[] predicted = forest.Decide(testVectors); 



new Codification("-1","0","1"); 

残念ながら、これは与えられたキーが辞書になかったことを示すランタイムエラーを生成します。私は、2つの別々のコードブックで鍵生成を同期させる方法があると確信しています。 3つのキーをすべて含む私の列車データの3行をテストデータの先頭に追加すると、以下のコードで動作させることができます。ではない私の好み溶液; =)


public void test_learn() 
Accord.Math.Random.Generator.Seed = 1; 

    /////////// TRAINING SET /////////// 
    // First, let's load the TRAINING set into an array of text that we can process 
    string[][] text = Resources.train.Split(new[] { "\r\n" }, 
     StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); 

    int length = text[0].Length; 
    List<int> columns = new List<int>(); 
    for (int i = 1; i < length; i++) 
    double[][] trainVectors = text.GetColumns(columns.ToArray()).To<double[][]>(); 

    // The first column contains the expected ternary category (i.e. -1, 0, or 1) 
    string[] trainLabels = text.GetColumn(0); 
    var trainCodebook = new Codification("Output", trainLabels); 
    int[] trainOutputs = trainCodebook.Translate("Output", trainLabels); 

    ////////// TEST SET //////////// 

    text = Resources.test.Split(new[] { "\r\n" }, 
     StringSplitOptions.RemoveEmptyEntries).Apply(x => x.Split(',')); 

    double[][] testVectors = text.GetColumns(columns.ToArray()).To<double[][]>(); 
    string[] testLabels = text.GetColumn(0); 
    var testCodebook = new Codification("Output", testLabels); 
    int[] testOutputs = testCodebook.Translate("Output", testLabels); 

    var teacher = new RandomForestLearning() 
     NumberOfTrees = 10, 

    var forest = teacher.Learn(trainVectors, trainOutputs); 
    int[] predicted = forest.Decide(testVectors); 

    int lineNum = 1; 
    foreach (int prediction in predicted) 
     Console.WriteLine("Prediction " + lineNum + ": " 
     + trainCodebook.Translate("Output", prediction)); 
    // I'm using the test vectors to calculate the error rate 
    double error = new ZeroOneLoss(testOutputs).Loss(forest.Decide(testVectors)); 

    Console.WriteLine("Error term is " + error); 

    Assert.IsTrue(error < 0.20); // humble expectations ;-) 

トレーニングセットから** **コードブックが実際に作成されているはずです。トレーニング*と*テストセット。 – Cesar





namespace Accord.MachineLearning.DecisionTrees 
    using System; 
    using System.Collections.Generic; 
    using System.Linq; 
    using System.Text; 
    using System.Threading.Tasks; 
    using System.Data; 
    using System.Runtime.Serialization; 
    using System.Runtime.Serialization.Formatters.Binary; 
    using System.IO; 
    using Accord.Statistics.Filters; 
    using Accord.Math; 
    using AForge; 
    using Accord.Statistics; 
    using System.Threading; 

/// <summary> 
/// Random Forest. 
/// </summary> 
/// <remarks> 
/// <para> 
/// Represents a random forest of <see cref="DecisionTree"/>s. For 
/// sample usage and example of learning, please see the documentation 
/// page for <see cref="RandomForestLearning"/>.</para> 
/// </remarks> 
/// <seealso cref="DecisionTree"/> 
/// <seealso cref="RandomForestLearning"/> 
public class RandomForest : MulticlassClassifierBase, IParallel 
    private DecisionTree[] trees; 
    private ParallelOptions parallelOptions;** 

    /// <summary> 
    /// Gets the trees in the random forest. 
    /// </summary> 
    public DecisionTree[] Trees 
     get { return trees; } 

    /// <summary> 
    /// Gets the number of classes that can be recognized 
    /// by this random forest. 
    /// </summary> 
    [Obsolete("Please use NumberOfOutputs instead.")] 
    public int Classes { get { return NumberOfOutputs; } } 

    /// <summary> 
    /// Gets or sets the parallelization options for this algorithm. 
    /// </summary> 
    **public ParallelOptions ParallelOptions { get { return parallelOptions; } set { parallelOptions = value; } }** 

    /// <summary> 
    /// Gets or sets a cancellation token that can be used 
    /// to cancel the algorithm while it is running. 
    /// </summary> 
    public CancellationToken Token 
     get { return ParallelOptions.CancellationToken; } 
     set { ParallelOptions.CancellationToken = value; } 

    /// <summary> 
    /// Creates a new random forest. 
    /// </summary> 
    /// <param name="trees">The number of trees in the forest.</param> 
    /// <param name="classes">The number of classes in the classification problem.</param> 
    public RandomForest(int trees, int classes) 
     this.trees = new DecisionTree[trees]; 
     this.NumberOfOutputs = classes; 
     this.ParallelOptions = new ParallelOptions(); 

    /// <summary> 
    /// Computes the decision output for a given input vector. 
    /// </summary> 
    /// <param name="data">The input vector.</param> 
    /// <returns>The forest decision for the given vector.</returns> 
    [Obsolete("Please use Decide() instead.")] 
    public int Compute(double[] data) 
     return Decide(data); 

    /// <summary> 
    /// Computes a class-label decision for a given <paramref name="input" />. 
    /// </summary> 
    /// <param name="input">The input vector that should be classified into 
    /// one of the <see cref="ITransform.NumberOfOutputs" /> possible classes.</param> 
    /// <returns>A class-label that best described <paramref name="input" /> according 
    /// to this classifier.</returns> 
    public override int Decide(double[] input) 
     int[] responses = new int[NumberOfOutputs]; 
     Parallel.For(0, trees.Length, ParallelOptions, i => 
      int j = trees[i].Decide(input); 
      Interlocked.Increment(ref responses[j]); 

     return responses.ArgMax(); 

    internal void OnDeserializingMethod(StreamingContext context) 
     this.ParallelOptions = new ParallelOptions(); 