Search in sources :

Example 1 with LastTimeStepVertex

use of org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsComputationGraph method testLSTMWithLastTimeStepVertex.

@Test
public void testLSTMWithLastTimeStepVertex() {
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out").addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(), "input").addVertex("lastTS", new LastTimeStepVertex("input"), "lstm1").addLayer("out", new OutputLayer.Builder().nIn(4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lastTS").pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Random r = new Random(12345);
    INDArray input = Nd4j.rand(new int[] { 3, 3, 5 });
    //Here: labels are 2d (due to LastTimeStepVertex)
    INDArray labels = Nd4j.zeros(3, 3);
    for (int i = 0; i < 3; i++) {
        labels.putScalar(new int[] { i, r.nextInt(3) }, 1.0);
    }
    if (PRINT_RESULTS) {
        System.out.println("testLSTMWithLastTimeStepVertex()");
        for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
    }
    //First: test with no input mask array
    boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
    String msg = "testLSTMWithLastTimeStepVertex()";
    assertTrue(msg, gradOK);
    //Second: test with input mask arrays.
    INDArray inMask = Nd4j.zeros(3, 5);
    inMask.putRow(0, Nd4j.create(new double[] { 1, 1, 1, 0, 0 }));
    inMask.putRow(1, Nd4j.create(new double[] { 1, 1, 1, 1, 0 }));
    inMask.putRow(2, Nd4j.create(new double[] { 1, 1, 1, 1, 1 }));
    graph.setLayerMaskArrays(new INDArray[] { inMask }, null);
    gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
    assertTrue(msg, gradOK);
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 2 with LastTimeStepVertex

use of org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsComputationGraph method testLSTMWithDuplicateToTimeSeries.

@Test
public void testLSTMWithDuplicateToTimeSeries() {
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input1", "input2").setOutputs("out").addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(), "input1").addLayer("lstm2", new GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.SOFTSIGN).build(), "input2").addVertex("lastTS", new LastTimeStepVertex("input2"), "lstm2").addVertex("duplicate", new DuplicateToTimeSeriesVertex("input2"), "lastTS").addLayer("out", new RnnOutputLayer.Builder().nIn(5 + 4).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "lstm1", "duplicate").pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Random r = new Random(12345);
    INDArray input1 = Nd4j.rand(new int[] { 3, 3, 5 });
    INDArray input2 = Nd4j.rand(new int[] { 3, 4, 5 });
    INDArray labels = Nd4j.zeros(3, 3, 5);
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 5; j++) {
            labels.putScalar(new int[] { i, r.nextInt(3), j }, 1.0);
        }
    }
    if (PRINT_RESULTS) {
        System.out.println("testLSTMWithDuplicateToTimeSeries()");
        for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
    }
    boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input1, input2 }, new INDArray[] { labels });
    String msg = "testLSTMWithDuplicateToTimeSeries()";
    assertTrue(msg, gradOK);
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DuplicateToTimeSeriesVertex(org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 3 with LastTimeStepVertex

use of org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex in project deeplearning4j by deeplearning4j.

the class TestGraphNodes method testLastTimeStepVertex.

@Test
public void testLastTimeStepVertex() {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addVertex("lastTS", new LastTimeStepVertex("in"), "in").addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).build(), "lastTS").setOutputs("out").build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    //First: test without input mask array
    Nd4j.getRandom().setSeed(12345);
    INDArray in = Nd4j.rand(new int[] { 3, 5, 6 });
    INDArray expOut = in.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5));
    GraphVertex gv = graph.getVertex("lastTS");
    gv.setInputs(in);
    //Forward pass:
    INDArray outFwd = gv.doForward(true);
    assertEquals(expOut, outFwd);
    //Backward pass:
    gv.setEpsilon(expOut);
    Pair<Gradient, INDArray[]> pair = gv.doBackward(false);
    INDArray eps = pair.getSecond()[0];
    assertArrayEquals(in.shape(), eps.shape());
    assertEquals(Nd4j.zeros(3, 5, 5), eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4, true)));
    assertEquals(expOut, eps.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(5)));
    //Second: test with input mask array
    INDArray inMask = Nd4j.zeros(3, 6);
    inMask.putRow(0, Nd4j.create(new double[] { 1, 1, 1, 0, 0, 0 }));
    inMask.putRow(1, Nd4j.create(new double[] { 1, 1, 1, 1, 0, 0 }));
    inMask.putRow(2, Nd4j.create(new double[] { 1, 1, 1, 1, 1, 0 }));
    graph.setLayerMaskArrays(new INDArray[] { inMask }, null);
    expOut = Nd4j.zeros(3, 5);
    expOut.putRow(0, in.get(NDArrayIndex.point(0), NDArrayIndex.all(), NDArrayIndex.point(2)));
    expOut.putRow(1, in.get(NDArrayIndex.point(1), NDArrayIndex.all(), NDArrayIndex.point(3)));
    expOut.putRow(2, in.get(NDArrayIndex.point(2), NDArrayIndex.all(), NDArrayIndex.point(4)));
    gv.setInputs(in);
    outFwd = gv.doForward(true);
    assertEquals(expOut, outFwd);
    String json = conf.toJson();
    ComputationGraphConfiguration conf2 = ComputationGraphConfiguration.fromJson(json);
    assertEquals(conf, conf2);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 4 with LastTimeStepVertex

use of org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex in project deeplearning4j by deeplearning4j.

the class TestGraphNodes method testJSON.

@Test
public void testJSON() {
    //The config here is non-sense, but that doesn't matter for config -> json -> config test
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addVertex("v1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "in").addVertex("v2", new org.deeplearning4j.nn.conf.graph.MergeVertex(), "in", "in").addVertex("v3", new PreprocessorVertex(new CnnToFeedForwardPreProcessor(1, 2, 1)), "in").addVertex("v4", new org.deeplearning4j.nn.conf.graph.SubsetVertex(0, 1), "in").addVertex("v5", new DuplicateToTimeSeriesVertex("in"), "in").addVertex("v6", new LastTimeStepVertex("in"), "in").addVertex("v7", new org.deeplearning4j.nn.conf.graph.StackVertex(), "in").addVertex("v8", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 1), "in").addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in").setOutputs("out").build();
    String json = conf.toJson();
    ComputationGraphConfiguration conf2 = ComputationGraphConfiguration.fromJson(json);
    assertEquals(conf, conf2);
}
Also used : PreprocessorVertex(org.deeplearning4j.nn.conf.graph.PreprocessorVertex) CnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor) ElementWiseVertex(org.deeplearning4j.nn.conf.graph.ElementWiseVertex) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DuplicateToTimeSeriesVertex(org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) Test(org.junit.Test)

Example 5 with LastTimeStepVertex

use of org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex in project deeplearning4j by deeplearning4j.

the class FlowIterationListenerTest method setUp.

@Before
public void setUp() throws Exception {
    if (graph == null) {
        int VOCAB_SIZE = 1000;
        ComputationGraphConfiguration configuration = new NeuralNetConfiguration.Builder().regularization(true).l2(0.0001).weightInit(WeightInit.XAVIER).learningRate(0.01).updater(Updater.RMSPROP).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("inEn", "inFr").setInputTypes(InputType.recurrent(VOCAB_SIZE + 1), InputType.recurrent(VOCAB_SIZE + 1)).addLayer("embeddingEn", new EmbeddingLayer.Builder().nIn(VOCAB_SIZE + 1).nOut(128).activation(Activation.IDENTITY).build(), "inEn").addLayer("encoder", new GravesLSTM.Builder().nIn(128).nOut(256).activation(Activation.SOFTSIGN).build(), "embeddingEn").addVertex("lastTimeStep", new LastTimeStepVertex("inEn"), "encoder").addVertex("duplicateTimeStep", new DuplicateToTimeSeriesVertex("inFr"), "lastTimeStep").addLayer("embeddingFr", new EmbeddingLayer.Builder().nIn(VOCAB_SIZE + 1).nOut(128).activation(Activation.IDENTITY).build(), "inFr").addVertex("embeddingFrSeq", new PreprocessorVertex(new FeedForwardToRnnPreProcessor()), "embeddingFr").addLayer("decoder", new GravesLSTM.Builder().nIn(128 + 256).nOut(256).activation(Activation.SOFTSIGN).build(), "embeddingFrSeq", "duplicateTimeStep").addLayer("output", new RnnOutputLayer.Builder().nIn(256).nOut(VOCAB_SIZE + 1).activation(Activation.SOFTMAX).build(), "decoder").setOutputs("output").pretrain(false).backprop(true).build();
        graph = new ComputationGraph(configuration);
        graph.init();
        INDArray input = Nd4j.zeros(10, VOCAB_SIZE, 20);
        graph.setInputs(input, input);
    }
    if (network == null) {
        final int numRows = 40;
        final int numColumns = 40;
        int nChannels = 3;
        int outputNum = LFWLoader.NUM_LABELS;
        int numSamples = LFWLoader.NUM_IMAGES;
        boolean useSubset = false;
        // numSamples/10;
        int batchSize = 200;
        int iterations = 5;
        int splitTrainNum = (int) (batchSize * .8);
        int seed = 123;
        int listenerFreq = iterations / 5;
        DataSet lfwNext;
        SplitTestAndTrain trainTest;
        DataSet trainInput;
        List<INDArray> testInput = new ArrayList<>();
        List<INDArray> testLabels = new ArrayList<>();
        MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).activation(Activation.RELU).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.01).momentum(0.9).regularization(true).updater(Updater.ADAGRAD).useDropConnect(true).list().layer(0, new ConvolutionLayer.Builder(4, 4).name("cnn1").nIn(nChannels).stride(1, 1).nOut(20).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }).name("pool1").build()).layer(2, new ConvolutionLayer.Builder(3, 3).name("cnn2").stride(1, 1).nOut(40).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }).name("pool2").build()).layer(4, new ConvolutionLayer.Builder(3, 3).name("cnn3").stride(1, 1).nOut(60).build()).layer(5, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }).name("pool3").build()).layer(6, new ConvolutionLayer.Builder(2, 2).name("cnn4").stride(1, 1).nOut(80).build()).layer(7, new DenseLayer.Builder().name("ffn1").nOut(160).dropOut(0.5).build()).layer(8, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum).activation(Activation.SOFTMAX).build()).backprop(true).pretrain(false);
        new ConvolutionLayerSetup(builder, numRows, numColumns, nChannels);
        network = new MultiLayerNetwork(builder.build());
        network.init();
        INDArray input = Nd4j.zeros(10, nChannels, numRows, numColumns);
        network.setInput(input);
    }
}
Also used : PreprocessorVertex(org.deeplearning4j.nn.conf.graph.PreprocessorVertex) DataSet(org.nd4j.linalg.dataset.DataSet) ArrayList(java.util.ArrayList) DuplicateToTimeSeriesVertex(org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) SplitTestAndTrain(org.nd4j.linalg.dataset.SplitTestAndTrain) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ConvolutionLayerSetup(org.deeplearning4j.nn.conf.layers.setup.ConvolutionLayerSetup) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) LastTimeStepVertex(org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex) Before(org.junit.Before)

Aggregations

LastTimeStepVertex (org.deeplearning4j.nn.conf.graph.rnn.LastTimeStepVertex)5 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)4 Test (org.junit.Test)4 INDArray (org.nd4j.linalg.api.ndarray.INDArray)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 DuplicateToTimeSeriesVertex (org.deeplearning4j.nn.conf.graph.rnn.DuplicateToTimeSeriesVertex)3 Random (java.util.Random)2 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)2 PreprocessorVertex (org.deeplearning4j.nn.conf.graph.PreprocessorVertex)2 ArrayList (java.util.ArrayList)1 ElementWiseVertex (org.deeplearning4j.nn.conf.graph.ElementWiseVertex)1 ConvolutionLayerSetup (org.deeplearning4j.nn.conf.layers.setup.ConvolutionLayerSetup)1 CnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor)1 FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)1 Gradient (org.deeplearning4j.nn.gradient.Gradient)1 GraphVertex (org.deeplearning4j.nn.graph.vertex.GraphVertex)1 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)1 Before (org.junit.Before)1 DataSet (org.nd4j.linalg.dataset.DataSet)1