Search in sources :

Example 61 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class BackPropMLPTest method testIrisMiniBatchGradients.

private static void testIrisMiniBatchGradients(int miniBatchSize, int[] hiddenLayerSizes, Activation activationFunction) {
    int totalExamples = 10 * miniBatchSize;
    if (totalExamples > 150) {
        totalExamples = miniBatchSize * (150 / miniBatchSize);
    }
    if (miniBatchSize > 150) {
        fail();
    }
    DataSetIterator iris = new IrisDataSetIterator(miniBatchSize, totalExamples);
    MultiLayerNetwork network = new MultiLayerNetwork(getIrisMLPSimpleConfig(hiddenLayerSizes, Activation.SIGMOID));
    network.init();
    Layer[] layers = network.getLayers();
    int nLayers = layers.length;
    while (iris.hasNext()) {
        DataSet data = iris.next();
        INDArray x = data.getFeatureMatrix();
        INDArray y = data.getLabels();
        //Do forward pass:
        INDArray[] layerWeights = new INDArray[nLayers];
        INDArray[] layerBiases = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            layerWeights[i] = layers[i].getParam(DefaultParamInitializer.WEIGHT_KEY).dup();
            layerBiases[i] = layers[i].getParam(DefaultParamInitializer.BIAS_KEY).dup();
        }
        INDArray[] layerZs = new INDArray[nLayers];
        INDArray[] layerActivations = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            INDArray layerInput = (i == 0 ? x : layerActivations[i - 1]);
            layerZs[i] = layerInput.mmul(layerWeights[i]).addiRowVector(layerBiases[i]);
            layerActivations[i] = (i == nLayers - 1 ? doSoftmax(layerZs[i].dup()) : doSigmoid(layerZs[i].dup()));
        }
        //Do backward pass:
        INDArray[] deltas = new INDArray[nLayers];
        //Out - labels; shape=[miniBatchSize,nOut];
        deltas[nLayers - 1] = layerActivations[nLayers - 1].sub(y);
        assertArrayEquals(deltas[nLayers - 1].shape(), new int[] { miniBatchSize, 3 });
        for (int i = nLayers - 2; i >= 0; i--) {
            INDArray sigmaPrimeOfZ;
            sigmaPrimeOfZ = doSigmoidDerivative(layerZs[i]);
            INDArray epsilon = layerWeights[i + 1].mmul(deltas[i + 1].transpose()).transpose();
            deltas[i] = epsilon.mul(sigmaPrimeOfZ);
            assertArrayEquals(deltas[i].shape(), new int[] { miniBatchSize, hiddenLayerSizes[i] });
        }
        INDArray[] dLdw = new INDArray[nLayers];
        INDArray[] dLdb = new INDArray[nLayers];
        for (int i = 0; i < nLayers; i++) {
            INDArray prevActivations = (i == 0 ? x : layerActivations[i - 1]);
            //Raw gradients, so not yet divided by mini-batch size (division is done in BaseUpdater)
            //Shape: [nIn, nOut]
            dLdw[i] = deltas[i].transpose().mmul(prevActivations).transpose();
            //Shape: [1,nOut]
            dLdb[i] = deltas[i].sum(0);
            int nIn = (i == 0 ? 4 : hiddenLayerSizes[i - 1]);
            int nOut = (i < nLayers - 1 ? hiddenLayerSizes[i] : 3);
            assertArrayEquals(dLdw[i].shape(), new int[] { nIn, nOut });
            assertArrayEquals(dLdb[i].shape(), new int[] { 1, nOut });
        }
        //Calculate and get gradient, compare to expected
        network.setInput(x);
        network.setLabels(y);
        network.computeGradientAndScore();
        Gradient gradient = network.gradientAndScore().getFirst();
        float eps = 1e-4f;
        for (int i = 0; i < hiddenLayerSizes.length; i++) {
            String wKey = i + "_" + DefaultParamInitializer.WEIGHT_KEY;
            String bKey = i + "_" + DefaultParamInitializer.BIAS_KEY;
            INDArray wGrad = gradient.getGradientFor(wKey);
            INDArray bGrad = gradient.getGradientFor(bKey);
            float[] wGradf = asFloat(wGrad);
            float[] bGradf = asFloat(bGrad);
            float[] expWGradf = asFloat(dLdw[i]);
            float[] expBGradf = asFloat(dLdb[i]);
            assertArrayEquals(wGradf, expWGradf, eps);
            assertArrayEquals(bGradf, expBGradf, eps);
        }
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator)

Example 62 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class MultiLayerTest method testGradientUpdate.

@Test
public void testGradientUpdate() throws Exception {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);
    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("0_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("0_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("1_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("1_b", Nd4j.ones(1, 3));
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(1).activation(Activation.RELU).weightInit(WeightInit.XAVIER).list().layer(0, new DenseLayer.Builder().name("dnn1").nIn(4).nOut(5).build()).layer(1, new OutputLayer.Builder().name("output").nIn(5).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.fit(iter.next());
    // TODO validate actual layer gradientView - issue getting var out of BaseLayer w/o adding MLN getter that gets confused with local gradient vars
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
    net.update(expectedGradient);
    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("0_W"), actualGradient.getGradientFor("0_W"));
    // Update params with set
    net.setParam("0_W", Nd4j.ones(4, 5));
    net.setParam("0_b", Nd4j.ones(1, 5));
    net.setParam("1_W", Nd4j.ones(5, 3));
    net.setParam("1_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();
    // Confirm params
    assertEquals(expectedGradient.gradient(), actualParams);
    net.update(expectedGradient);
    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) Test(org.junit.Test)

Example 63 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestVAE method testParamGradientOrderAndViews.

@Test
public void testParamGradientOrderAndViews() {
    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10).nOut(5).encoderLayerSizes(12, 13).decoderLayerSizes(14, 15).build()).build();
    NeuralNetConfiguration c = mlc.getConf(0);
    org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer();
    MultiLayerNetwork net = new MultiLayerNetwork(mlc);
    net.init();
    net.initGradientsView();
    org.deeplearning4j.nn.layers.variational.VariationalAutoencoder layer = (org.deeplearning4j.nn.layers.variational.VariationalAutoencoder) net.getLayer(0);
    Map<String, INDArray> layerParams = layer.paramTable();
    Map<String, INDArray> layerGradViews = layer.getGradientViews();
    layer.setInput(Nd4j.rand(3, 10));
    layer.computeGradientAndScore();
    ;
    Gradient g = layer.gradient();
    Map<String, INDArray> grads = g.gradientForVariable();
    assertEquals(layerParams.size(), layerGradViews.size());
    assertEquals(layerParams.size(), grads.size());
    //Iteration order should be consistent due to linked hashmaps
    Iterator<String> pIter = layerParams.keySet().iterator();
    Iterator<String> gvIter = layerGradViews.keySet().iterator();
    Iterator<String> gIter = grads.keySet().iterator();
    while (pIter.hasNext()) {
        String p = pIter.next();
        String gv = gvIter.next();
        String gr = gIter.next();
        //            System.out.println(p + "\t" + gv + "\t" + gr);
        assertEquals(p, gv);
        assertEquals(p, gr);
        INDArray pArr = layerParams.get(p);
        INDArray gvArr = layerGradViews.get(p);
        INDArray gArr = grads.get(p);
        assertArrayEquals(pArr.shape(), gvArr.shape());
        //Should be the exact same object due to view mechanics
        assertTrue(gvArr == gArr);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) org.deeplearning4j.nn.conf.layers.variational(org.deeplearning4j.nn.conf.layers.variational) VariationalAutoencoder(org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) VariationalAutoencoder(org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 64 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class GravesLSTMTest method testGravesBackwardBasicHelper.

private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize, int timeSeriesLength) {
    INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(lstmNHiddenUnits).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    GravesLSTM lstm = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
    lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf)));
    //Set input, do a forward pass:
    lstm.activate(inputData);
    assertNotNull(lstm.input());
    INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
    Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon);
    Gradient outGradient = out.getFirst();
    INDArray nextEpsilon = out.getSecond();
    INDArray biasGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.BIAS_KEY);
    INDArray inWeightGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    INDArray recurrentWeightGradient = outGradient.getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    assertNotNull(biasGradient);
    assertNotNull(inWeightGradient);
    assertNotNull(recurrentWeightGradient);
    assertArrayEquals(biasGradient.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
    assertArrayEquals(inWeightGradient.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
    assertArrayEquals(recurrentWeightGradient.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
    assertNotNull(nextEpsilon);
    assertArrayEquals(nextEpsilon.shape(), new int[] { miniBatchSize, nIn, timeSeriesLength });
    //Check update:
    for (String s : outGradient.gradientForVariable().keySet()) {
        lstm.update(outGradient.getGradientFor(s), s);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration)

Example 65 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class RBMTests method testComputeGradientAndScore.

@Test
public void testComputeGradientAndScore() {
    INDArray input = Nd4j.linspace(1, 10, 10);
    INDArray params = getStandardParams(10, 5);
    RBM rbm = getRBMLayer(10, 5, HiddenUnit.BINARY, VisibleUnit.BINARY, params, true, false, 1, LossFunctions.LossFunction.MSE);
    rbm.setInput(input);
    rbm.computeGradientAndScore();
    Pair<Gradient, Double> pair = rbm.gradientAndScore();
    INDArray hprob = sigmoid(input.mmul(rbm.getParam(PretrainParamInitializer.WEIGHT_KEY)).addiRowVector(rbm.getParam(PretrainParamInitializer.BIAS_KEY)));
    INDArray vprob = sigmoid(hprob.mmul(rbm.getParam(PretrainParamInitializer.WEIGHT_KEY).transpose()).addiRowVector(rbm.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY)));
    Distribution dist = Nd4j.getDistributions().createBinomial(1, vprob);
    dist.reseedRandomGenerator(42);
    INDArray vSample = dist.sample(vprob.shape());
    //double expectedScore = LossFunctions.LossFunction.MSE.getILossFunction().computeScore(input, vSample, "sigmoid", null, false);
    double expectedScore = LossFunctions.LossFunction.MSE.getILossFunction().computeScore(input, vSample, new ActivationSigmoid(), null, false);
    assertEquals(expectedScore, pair.getSecond(), 1e-8);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Distribution(org.nd4j.linalg.api.rng.distribution.Distribution) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ActivationSigmoid(org.nd4j.linalg.activations.impl.ActivationSigmoid) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4