Search in sources :

Example 71 with IrisDataSetIterator

use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testOptimizationAlgorithmsSearchBasic.

@Test
public void testOptimizationAlgorithmsSearchBasic() {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);
    OptimizationAlgorithm[] oas = new OptimizationAlgorithm[] { OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, OptimizationAlgorithm.LINE_GRADIENT_DESCENT, OptimizationAlgorithm.CONJUGATE_GRADIENT, OptimizationAlgorithm.LBFGS };
    for (OptimizationAlgorithm oa : oas) {
        System.out.println(oa);
        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(oa).iterations(1).graphBuilder().addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input").addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).build(), "first").setOutputs("output").pretrain(false).backprop(true).build();
        ComputationGraph net = new ComputationGraph(conf);
        net.init();
        net.fit(iter.next());
    }
}
Also used : OptimizationAlgorithm(org.deeplearning4j.nn.api.OptimizationAlgorithm) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 72 with IrisDataSetIterator

use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.

the class BackPropMLPTest method testMLP.

@Test
public void testMLP() {
    //Simple mini-batch test with multiple hidden layers
    MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] { 5, 4, 3 }, Activation.SIGMOID);
    System.out.println(conf);
    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();
    DataSetIterator iter = new IrisDataSetIterator(10, 100);
    while (iter.hasNext()) {
        network.fit(iter.next());
    }
}
Also used : MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 73 with IrisDataSetIterator

use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.

the class BackPropMLPTest method testSingleExampleWeightUpdates.

@Test
public void testSingleExampleWeightUpdates() {
    //Simplest possible case: 1 hidden layer, 1 hidden neuron, batch size of 1.
    //Manually calculate weight updates (entirely outside of DL4J and ND4J)
    // and compare expected and actual weights after backprop
    DataSetIterator iris = new IrisDataSetIterator(1, 10);
    MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(new int[] { 1 }, Activation.SIGMOID));
    network.init();
    Layer[] layers = network.getLayers();
    final boolean printCalculations = true;
    while (iris.hasNext()) {
        DataSet data = iris.next();
        INDArray x = data.getFeatureMatrix();
        INDArray y = data.getLabels();
        float[] xFloat = asFloat(x);
        float[] yFloat = asFloat(y);
        //Do forward pass:
        //Hidden layer
        INDArray l1Weights = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
        //Output layer
        INDArray l2Weights = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
        INDArray l1Bias = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
        INDArray l2Bias = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
        float[] l1WeightsFloat = asFloat(l1Weights);
        float[] l2WeightsFloat = asFloat(l2Weights);
        float l1BiasFloat = l1Bias.getFloat(0);
        float[] l2BiasFloatArray = asFloat(l2Bias);
        //z=w*x+b
        float hiddenUnitPreSigmoid = dotProduct(l1WeightsFloat, xFloat) + l1BiasFloat;
        //a=sigma(z)
        float hiddenUnitPostSigmoid = sigmoid(hiddenUnitPreSigmoid);
        float[] outputPreSoftmax = new float[3];
        //Normally a matrix multiplication here, but only one hidden unit in this trivial example
        for (int i = 0; i < 3; i++) {
            outputPreSoftmax[i] = hiddenUnitPostSigmoid * l2WeightsFloat[i] + l2BiasFloatArray[i];
        }
        float[] outputPostSoftmax = softmax(outputPreSoftmax);
        //Do backward pass:
        //out-labels
        float[] deltaOut = vectorDifference(outputPostSoftmax, yFloat);
        //deltaHidden = sigmaPrime(hiddenUnitZ) * sum_k (w_jk * \delta_k); here, only one j
        float deltaHidden = 0.0f;
        for (int i = 0; i < 3; i++) deltaHidden += l2WeightsFloat[i] * deltaOut[i];
        deltaHidden *= derivOfSigmoid(hiddenUnitPreSigmoid);
        //Calculate weight/bias updates:
        //dL/dw = delta * (activation of prev. layer)
        //dL/db = delta
        float[] dLdwOut = new float[3];
        for (int i = 0; i < dLdwOut.length; i++) dLdwOut[i] = deltaOut[i] * hiddenUnitPostSigmoid;
        float[] dLdwHidden = new float[4];
        for (int i = 0; i < dLdwHidden.length; i++) dLdwHidden[i] = deltaHidden * xFloat[i];
        float[] dLdbOut = deltaOut;
        float dLdbHidden = deltaHidden;
        if (printCalculations) {
            System.out.println("deltaOut = " + Arrays.toString(deltaOut));
            System.out.println("deltaHidden = " + deltaHidden);
            System.out.println("dLdwOut = " + Arrays.toString(dLdwOut));
            System.out.println("dLdbOut = " + Arrays.toString(dLdbOut));
            System.out.println("dLdwHidden = " + Arrays.toString(dLdwHidden));
            System.out.println("dLdbHidden = " + dLdbHidden);
        }
        //Calculate new parameters:
        //w_i = w_i - (learningRate)/(batchSize) * sum_j (dL_j/dw_i)
        //b_i = b_i - (learningRate)/(batchSize) * sum_j (dL_j/db_i)
        //Which for batch size of one (here) is simply:
        //w_i = w_i - learningRate * dL/dw
        //b_i = b_i - learningRate * dL/db
        float[] expectedL1WeightsAfter = new float[4];
        float[] expectedL2WeightsAfter = new float[3];
        float expectedL1BiasAfter = l1BiasFloat - 0.1f * dLdbHidden;
        float[] expectedL2BiasAfter = new float[3];
        for (int i = 0; i < 4; i++) expectedL1WeightsAfter[i] = l1WeightsFloat[i] - 0.1f * dLdwHidden[i];
        for (int i = 0; i < 3; i++) expectedL2WeightsAfter[i] = l2WeightsFloat[i] - 0.1f * dLdwOut[i];
        for (int i = 0; i < 3; i++) expectedL2BiasAfter[i] = l2BiasFloatArray[i] - 0.1f * dLdbOut[i];
        //Finally, do back-prop on network, and compare parameters vs. expected parameters
        network.fit(data);
        /*  INDArray l1WeightsAfter = layers[0].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();	//Hidden layer
            INDArray l2WeightsAfter = layers[1].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();	//Output layer
            INDArray l1BiasAfter = layers[0].getParam(DefaultParamInitializer.BIAS_KEY).dup();
            INDArray l2BiasAfter = layers[1].getParam(DefaultParamInitializer.BIAS_KEY).dup();
            float[] l1WeightsFloatAfter = asFloat(l1WeightsAfter);
            float[] l2WeightsFloatAfter = asFloat(l2WeightsAfter);
            float l1BiasFloatAfter = l1BiasAfter.getFloat(0);
            float[] l2BiasFloatAfter = asFloat(l2BiasAfter);
            
            if( printCalculations) {
                System.out.println("Expected L1 weights = " + Arrays.toString(expectedL1WeightsAfter));
                System.out.println("Actual L1 weights = " + Arrays.toString(asFloat(l1WeightsAfter)));
                System.out.println("Expected L2 weights = " + Arrays.toString(expectedL2WeightsAfter));
                System.out.println("Actual L2 weights = " + Arrays.toString(asFloat(l2WeightsAfter)));
                System.out.println("Expected L1 bias = " + expectedL1BiasAfter);
                System.out.println("Actual L1 bias = " + Arrays.toString(asFloat(l1BiasAfter)));
                System.out.println("Expected L2 bias = " + Arrays.toString(expectedL2BiasAfter));
                System.out.println("Actual L2 bias = " + Arrays.toString(asFloat(l2BiasAfter)));
            }
            
            
            float eps = 1e-4f;
            assertArrayEquals(l1WeightsFloatAfter,expectedL1WeightsAfter,eps);
            assertArrayEquals(l2WeightsFloatAfter,expectedL2WeightsAfter,eps);
            assertEquals(l1BiasFloatAfter,expectedL1BiasAfter,eps);
            assertArrayEquals(l2BiasFloatAfter,expectedL2BiasAfter,eps);
            */
        System.out.println("\n\n--------------");
    }
}
Also used : IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 74 with IrisDataSetIterator

use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.

the class BackPropMLPTest method testIrisMiniBatchGradients.

private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes, Activation activationFunction) {
    int totalExamples = 10 * miniBatchSize;
    if (totalExamples > 150) {
        totalExamples = miniBatchSize * (150 / miniBatchSize);
    }
    if (miniBatchSize > 150) {
        fail();
    }
    DataSetIterator iris = new IrisDataSetIterator(miniBatchSize, totalExamples);
    MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes, Activation.SIGMOID));
    network.init();
    Layer[] layers = network.getLayers();
    int nLayers = layers.length;
    while (iris.hasNext()) {
        DataSet data = iris.next();
        INDArray x = data.getFeatureMatrix();
        INDArray y = data.getLabels();
        //Do forward pass:
        INDArray[] layerWeights = new INDArray[nLayers];
        INDArray[] layerBiases = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
            layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup();
        }
        INDArray[] layerZs = new INDArray[nLayers];
        INDArray[] layerActivations = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            INDArray layerInput = (i == 0 ? x : layerActivations[i - 1]);
            layerZs[i] = layerInput.mmul(layerWeights[i]).addiRowVector(layerBiases[i]);
            layerActivations[i] = (i == nLayers - 1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup()));
        }
        //Do backward pass:
        INDArray[] deltas = new INDArray[nLayers];
        //Out - labels; shape=[miniBatchSize,nOut];
        deltas[nLayers - 1] = layerActivations[nLayers - 1].sub(y);
        assertArrayEquals(deltas[nLayers - 1].shape(), new int[] { miniBatchSize, 3 });
        for (int i = nLayers - 2; i >= 0; i--) {
            INDArray sigmaPrimeOfZ;
            sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]);
            INDArray epsilon = layerWeights[i + 1].mmul(deltas[i + 1].transpose()).transpose();
            deltas[i] = epsilon.mul(sigmaPrimeOfZ);
            assertArrayEquals(deltas[i].shape(), new int[] { miniBatchSize, hiddenLayerSizes[i] });
        }
        INDArray[] dLdw = new INDArray[nLayers];
        INDArray[] dLdb = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            INDArray prevActivations = (i == 0 ? x : layerActivations[i - 1]);
            //Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater)
            //Shape: [nIn, nOut]
            dLdw[i] = deltas[i].transpose().mmul(prevActivations).transpose();
            //Shape: [1,nOut]
            dLdb[i] = deltas[i].sum(0);
            int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
            int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3);
            assertArrayEquals(dLdw[i].shape(), new int[] { nIn, nOut });
            assertArrayEquals(dLdb[i].shape(), new int[] { 1, nOut });
        }
        //Calculate and get gradient, compare to expected
        network.setInput(x);
        network.setLabels(y);
        network.computeGradientAndScore();
        Gradient gradient = network.gradientAndScore().getFirst();
        float eps = 1e-4f;
        for (int i = 0; i < hiddenLayerSizes.length; i++) {
            String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY;
            String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY;
            INDArray wGrad = gradient.getGradientFor(wKey);
            INDArray bGrad = gradient.getGradientFor(bKey);
            float[] wGradf = asFloat(wGrad);
            float[] bGradf = asFloat(bGrad);
            float[] expWGradf = asFloat(dLdw[i]);
            float[] expBGradf = asFloat(dLdb[i]);
            assertArrayEquals(wGradf, expWGradf, eps);
            assertArrayEquals(bGradf, expBGradf, eps);
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator)

Example 75 with IrisDataSetIterator

use of org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator in project deeplearning4j by deeplearning4j.

the class MultiLayerTest method testGradientUpdate.

@Test
public void testGradientUpdate() throws Exception {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);
    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3));
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()).layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.fit(iter.next());
    // TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
    net.update(expectedGradient);
    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
    // Update params with set
    net.setParam("0_W", Nd4j.ones(4, 5));
    net.setParam("0_b", Nd4j.ones(1, 5));
    net.setParam("1_W", Nd4j.ones(5, 3));
    net.setParam("1_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();
    // Confirm params
    assertEquals(expectedGradient.gradient(), actualParams);
    net.update(expectedGradient);
    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) Test(org.junit.Test)

Aggregations

IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)96 Test (org.junit.Test)91 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)75 DataSet (org.nd4j.linalg.dataset.DataSet)48 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)47 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)41 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)41 INDArray (org.nd4j.linalg.api.ndarray.INDArray)37 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)35 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 InMemoryModelSaver (org.deeplearning4j.earlystopping.saver.InMemoryModelSaver)18 MaxEpochsTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition)18 BaseSparkTest (org.deeplearning4j.spark.BaseSparkTest)16 MaxTimeIterationTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationCondition)15 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)15 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)15 RecordReaderMultiDataSetIterator (org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator)13 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)13 MultiDataSetIterator (org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator)13 IEarlyStoppingTrainer (org.deeplearning4j.earlystopping.trainer.IEarlyStoppingTrainer)12