Search in sources :

Example 46 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTruncatedBPTTVsBPTT.

@Test
public void testTruncatedBPTTVsBPTT() {
    //Under some (limited) circumstances, we expect BPTT and truncated BPTT to be identical
    //Specifically TBPTT over entire data vector
    int timeSeriesLength = 12;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).build();
    assertEquals(BackpropType.Standard, conf.getBackpropType());
    ComputationGraphConfiguration confTBPTT = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(timeSeriesLength).tBPTTBackwardLength(timeSeriesLength).build();
    assertEquals(BackpropType.TruncatedBPTT, confTBPTT.getBackpropType());
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graphTBPTT = new ComputationGraph(confTBPTT);
    graphTBPTT.init();
    assertTrue(graphTBPTT.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT);
    assertTrue(graphTBPTT.getConfiguration().getTbpttFwdLength() == timeSeriesLength);
    assertTrue(graphTBPTT.getConfiguration().getTbpttBackLength() == timeSeriesLength);
    INDArray inputData = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    INDArray labels = Nd4j.rand(new int[] { miniBatchSize, nOut, timeSeriesLength });
    graph.setInput(0, inputData);
    graph.setLabel(0, labels);
    graphTBPTT.setInput(0, inputData);
    graphTBPTT.setLabel(0, labels);
    graph.computeGradientAndScore();
    graphTBPTT.computeGradientAndScore();
    Pair<Gradient, Double> graphPair = graph.gradientAndScore();
    Pair<Gradient, Double> graphTbpttPair = graphTBPTT.gradientAndScore();
    assertEquals(graphPair.getFirst().gradientForVariable(), graphTbpttPair.getFirst().gradientForVariable());
    assertEquals(graphPair.getSecond(), graphTbpttPair.getSecond());
    //Check states: expect stateMap to be empty but tBpttStateMap to not be
    Map<String, INDArray> l0StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l0StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l0TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l0TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(1)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(1)).rnnGetTBPTTState();
    assertTrue(l0StateMLN.isEmpty());
    assertTrue(l0StateTBPTT.isEmpty());
    assertTrue(l1StateMLN.isEmpty());
    assertTrue(l1StateTBPTT.isEmpty());
    assertTrue(l0TBPTTState.isEmpty());
    assertTrue(l0TBPTTStateTBPTT.size() == 2);
    assertTrue(l1TBPTTState.isEmpty());
    assertTrue(l1TBPTTStateTBPTT.size() == 2);
    INDArray tbpttActL0 = l0TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    INDArray tbpttActL1 = l1TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    Map<String, INDArray> activations = graph.feedForward(inputData, true);
    INDArray l0Act = activations.get("0");
    INDArray l1Act = activations.get("1");
    INDArray expL0Act = l0Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    INDArray expL1Act = l1Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    assertEquals(tbpttActL0, expL0Act);
    assertEquals(tbpttActL1, expL1Act);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) BaseRecurrentLayer(org.deeplearning4j.nn.layers.recurrent.BaseRecurrentLayer) Test(org.junit.Test)

Example 47 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class AutoEncoderTest method testBackProp.

@Test
public void testBackProp() throws Exception {
    MnistDataFetcher fetcher = new MnistDataFetcher(true);
    //        LayerFactory layerFactory = LayerFactories.getFactory(new org.deeplearning4j.nn.conf.layers.AutoEncoder());
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().momentum(0.9f).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).iterations(100).learningRate(1e-1f).layer(new org.deeplearning4j.nn.conf.layers.AutoEncoder.Builder().nIn(784).nOut(600).corruptionLevel(0.6).lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()).build();
    fetcher.fetch(100);
    DataSet d2 = fetcher.next();
    INDArray input = d2.getFeatureMatrix();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    AutoEncoder da = (AutoEncoder) conf.getLayer().instantiate(conf, null, 0, params, true);
    Gradient g = new DefaultGradient();
    g.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, da.decode(da.activate(input)).sub(input));
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) MnistDataFetcher(org.deeplearning4j.datasets.fetchers.MnistDataFetcher) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Example 48 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGraphNodes method testL2Node.

@Test
public void testL2Node() {
    Nd4j.getRandom().setSeed(12345);
    GraphVertex l2 = new L2Vertex(null, "", -1, 1e-8);
    INDArray in1 = Nd4j.rand(5, 2);
    INDArray in2 = Nd4j.rand(5, 2);
    l2.setInputs(in1, in2);
    INDArray out = l2.doForward(false);
    INDArray expOut = Nd4j.create(5, 1);
    for (int i = 0; i < 5; i++) {
        double d2 = 0.0;
        for (int j = 0; j < in1.size(1); j++) {
            double temp = (in1.getDouble(i, j) - in2.getDouble(i, j));
            d2 += temp * temp;
        }
        d2 = Math.sqrt(d2);
        expOut.putScalar(i, 0, d2);
    }
    assertEquals(expOut, out);
    //dL/dlambda
    INDArray epsilon = Nd4j.rand(5, 1);
    INDArray diff = in1.sub(in2);
    //Out == sqrt(s) = s^1/2. Therefore: s^(-1/2) = 1/out
    INDArray sNegHalf = out.rdiv(1.0);
    INDArray dLda = diff.mulColumnVector(epsilon.mul(sNegHalf));
    INDArray dLdb = diff.mulColumnVector(epsilon.mul(sNegHalf)).neg();
    l2.setEpsilon(epsilon);
    Pair<Gradient, INDArray[]> p = l2.doBackward(false);
    assertEquals(dLda, p.getSecond()[0]);
    assertEquals(dLdb, p.getSecond()[1]);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 49 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestGraphNodes method testLastTimeStepVertex.

@Test
public void testLastTimeStepVertex() {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addVertex("lastTS", new LastTimeStepVertex("in"), "in").addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).build(), "lastTS").setOutputs("out").build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    //First: test without input mask array
    Nd4j.getRandom().setSeed(12345);
    INDArray in = Nd4j.rand(new int[] { 3, 5, 6 });
    INDArray expOut = in.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5));
    GraphVertex gv = graph.getVertex("lastTS");
    gv.setInputs(in);
    //Forward pass:
    INDArray outFwd = gv.doForward(true);
    assertEquals(expOut, outFwd);
    //Backward pass:
    gv.setEpsilon(expOut);
    Pair<Gradient, INDArray[]> pair = gv.doBackward(false);
    INDArray eps = pair.getSecond()[0];
    assertArrayEquals(in.shape(), eps.shape());
    assertEquals(Nd4j.zeros(3, 5, 5), eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4, true)));
    assertEquals(expOut, eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5)));
    //Second: test with input mask array
    INDArray inMask = Nd4j.zeros(3, 6);
    inMask.putRow(0, Nd4j.create(new double[] { 1, 1, 1, 0, 0, 0 }));
    inMask.putRow(1, Nd4j.create(new double[] { 1, 1, 1, 1, 0, 0 }));
    inMask.putRow(2, Nd4j.create(new double[] { 1, 1, 1, 1, 1, 0 }));
    graph.setLayerMaskArrays(new INDArray[] { inMask }, null);
    expOut = Nd4j.zeros(3, 5);
    expOut.putRow(0, in.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(2)));
    expOut.putRow(1, in.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.point(3)));
    expOut.putRow(2, in.get(NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.point(4)));
    gv.setInputs(in);
    outFwd = gv.doForward(true);
    assertEquals(expOut, outFwd);
    String json = conf.toJson();
    ComputationGraphConfiguration conf2 = ComputationGraphConfiguration.fromJson(json);
    assertEquals(conf, conf2);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 50 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testExternalErrors.

@Test
public void testExternalErrors() {
    //Simple test: same network, but in one case: one less layer (the OutputLayer), where the epsilons are passed in externally
    // instead. Should get identical results
    Nd4j.getRandom().setSeed(12345);
    INDArray inData = Nd4j.rand(3, 10);
    INDArray outData = Nd4j.rand(3, 10);
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration standard = new NeuralNetConfiguration.Builder().learningRate(0.1).updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in").addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("out", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build(), "l0").setOutputs("out").pretrain(false).backprop(true).build();
    ComputationGraph s = new ComputationGraph(standard);
    s.init();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration external = new NeuralNetConfiguration.Builder().learningRate(0.1).updater(Updater.SGD).seed(12345).graphBuilder().addInputs("in").addLayer("l0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("l0").pretrain(false).backprop(true).build();
    ComputationGraph e = new ComputationGraph(external);
    e.init();
    s.setInputs(inData);
    s.setLabels(outData);
    s.computeGradientAndScore();
    Gradient sGrad = s.gradient();
    org.deeplearning4j.nn.layers.OutputLayer ol = (org.deeplearning4j.nn.layers.OutputLayer) s.getLayer(1);
    Pair<Gradient, INDArray> olPairStd = ol.backpropGradient(null);
    INDArray olEpsilon = olPairStd.getSecond();
    e.feedForward(inData, true);
    Gradient extErrorGrad = e.backpropGradient(olEpsilon);
    int nParamsDense = 10 * 10 + 10;
    assertEquals(sGrad.gradient().get(NDArrayIndex.point(0), NDArrayIndex.interval(0, nParamsDense)), extErrorGrad.gradient());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) org.deeplearning4j.nn.conf.layers(org.deeplearning4j.nn.conf.layers) Test(org.junit.Test)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4