Search in sources :

Example 6 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class MultiLayerTest method testSummary.

@Test
public void testSummary() {
    int V_WIDTH = 130;
    int V_HEIGHT = 130;
    int V_NFRAMES = 150;
    MultiLayerConfiguration confForArchitecture = //l2 regularization on all layers
    new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.4).list().layer(0, //3 channels: RGB
    new ConvolutionLayer.Builder(10, 10).nIn(3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
    4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
    MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture);
    modelExpectedArch.init();
    MultiLayerNetwork modelMow = new TransferLearning.Builder(modelExpectedArch).setFeatureExtractor(2).build();
    System.out.println(modelExpectedArch.summary());
    System.out.println(modelMow.summary());
}
Also used : RnnToCnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor) CnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 7 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessor.

@Test
public void testRnnTimeStepWithPreprocessor() {
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()).inputPreProcessor(0, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    INDArray in = Nd4j.rand(1, 10);
    net.rnnTimeStep(in);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 8 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsComputationGraph method testLSTMWithMerging.

@Test
public void testLSTMWithMerging() {
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0.2, 0.6)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input").setOutputs("out").addLayer("lstm1", new GravesLSTM.Builder().nIn(3).nOut(4).activation(Activation.TANH).build(), "input").addLayer("lstm2", new GravesLSTM.Builder().nIn(4).nOut(4).activation(Activation.TANH).build(), "lstm1").addLayer("dense1", new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.SIGMOID).build(), "lstm1").addLayer("lstm3", new GravesLSTM.Builder().nIn(4).nOut(4).activation(Activation.TANH).build(), "dense1").addVertex("merge", new MergeVertex(), "lstm2", "lstm3").addLayer("out", new RnnOutputLayer.Builder().nIn(8).nOut(3).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "merge").inputPreProcessor("dense1", new RnnToFeedForwardPreProcessor()).inputPreProcessor("lstm3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Random r = new Random(12345);
    INDArray input = Nd4j.rand(new int[] { 3, 3, 5 });
    INDArray labels = Nd4j.zeros(3, 3, 5);
    for (int i = 0; i < 3; i++) {
        for (int j = 0; j < 5; j++) {
            labels.putScalar(new int[] { i, r.nextInt(3), j }, 1.0);
        }
    }
    if (PRINT_RESULTS) {
        System.out.println("testLSTMWithMerging()");
        for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
    }
    boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
    String msg = "testLSTMWithMerging()";
    assertTrue(msg, gradOK);
}
Also used : UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 9 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testRnnTimeStepGravesLSTM.

@Test
public void testRnnTimeStepGravesLSTM() {
    Nd4j.getRandom().setSeed(12345);
    int timeSeriesLength = 12;
    //4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("2", new DenseLayer.Builder().nIn(8).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").addLayer("3", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "2").setOutputs("3").inputPreProcessor("2", new RnnToFeedForwardPreProcessor()).inputPreProcessor("3", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray input = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
    Map<String, INDArray> allOutputActivations = graph.feedForward(input, true);
    INDArray fullOutL0 = allOutputActivations.get("0");
    INDArray fullOutL1 = allOutputActivations.get("1");
    INDArray fullOutL3 = allOutputActivations.get("3");
    assertArrayEquals(new int[] { 3, 7, timeSeriesLength }, fullOutL0.shape());
    assertArrayEquals(new int[] { 3, 8, timeSeriesLength }, fullOutL1.shape());
    assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullOutL3.shape());
    int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
    //Should get the same result regardless of step size; should be identical to standard forward pass
    for (int i = 0; i < inputLengths.length; i++) {
        int inLength = inputLengths[i];
        //each of length inLength
        int nSteps = timeSeriesLength / inLength;
        graph.rnnClearPreviousState();
        for (int j = 0; j < nSteps; j++) {
            int startTimeRange = j * inLength;
            int endTimeRange = startTimeRange + inLength;
            INDArray inputSubset = input.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            if (inLength > 1)
                assertTrue(inputSubset.size(2) == inLength);
            INDArray[] outArr = graph.rnnTimeStep(inputSubset);
            assertEquals(1, outArr.length);
            INDArray out = outArr[0];
            INDArray expOutSubset;
            if (inLength == 1) {
                int[] sizes = new int[] { fullOutL3.size(0), fullOutL3.size(1), 1 };
                expOutSubset = Nd4j.create(sizes);
                expOutSubset.tensorAlongDimension(0, 1, 0).assign(fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
            } else {
                expOutSubset = fullOutL3.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
            }
            assertEquals(expOutSubset, out);
            Map<String, INDArray> currL0State = graph.rnnGetPreviousState("0");
            Map<String, INDArray> currL1State = graph.rnnGetPreviousState("1");
            INDArray lastActL0 = currL0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray lastActL1 = currL1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
            INDArray expLastActL0 = fullOutL0.tensorAlongDimension(endTimeRange - 1, 1, 0);
            INDArray expLastActL1 = fullOutL1.tensorAlongDimension(endTimeRange - 1, 1, 0);
            assertEquals(expLastActL0, lastActL0);
            assertEquals(expLastActL1, lastActL1);
        }
    }
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 10 with FeedForwardToRnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor in project deeplearning4j by deeplearning4j.

the class EmbeddingLayerTest method testEmbeddingLayerWithMasking.

@Test
public void testEmbeddingLayerWithMasking() {
    //Idea: have masking on the input with an embedding and dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
    int[] miniBatchSizes = { 1, 2, 5 };
    int nIn = 2;
    Random r = new Random(12345);
    int numInputClasses = 10;
    int timeSeriesLength = 5;
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new EmbeddingLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(4).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf);
        net.init();
        MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3).nOut(4).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
        MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
        net2.init();
        net2.setParams(net.params().dup());
        INDArray inEmbedding = Nd4j.zeros(nExamples, 1, timeSeriesLength);
        INDArray inDense = Nd4j.zeros(nExamples, numInputClasses, timeSeriesLength);
        INDArray labels = Nd4j.zeros(nExamples, 4, timeSeriesLength);
        for (int i = 0; i < nExamples; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                int inIdx = r.nextInt(numInputClasses);
                inEmbedding.putScalar(new int[] { i, 0, j }, inIdx);
                inDense.putScalar(new int[] { i, inIdx, j }, 1.0);
                int outIdx = r.nextInt(4);
                labels.putScalar(new int[] { i, outIdx, j }, 1.0);
            }
        }
        INDArray inputMask = Nd4j.zeros(nExamples, timeSeriesLength);
        for (int i = 0; i < nExamples; i++) {
            for (int j = 0; j < timeSeriesLength; j++) {
                inputMask.putScalar(new int[] { i, j }, (r.nextBoolean() ? 1.0 : 0.0));
            }
        }
        net.setLayerMaskArrays(inputMask, null);
        net2.setLayerMaskArrays(inputMask, null);
        List<INDArray> actEmbedding = net.feedForward(inEmbedding, false);
        List<INDArray> actDense = net2.feedForward(inDense, false);
        for (int i = 1; i < actEmbedding.size(); i++) {
            assertEquals(actDense.get(i), actEmbedding.get(i));
        }
        net.setLabels(labels);
        net2.setLabels(labels);
        net.computeGradientAndScore();
        net2.computeGradientAndScore();
        System.out.println(net.score() + "\t" + net2.score());
        assertEquals(net2.score(), net.score(), 1e-5);
        Map<String, INDArray> gradients = net.gradient().gradientForVariable();
        Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable();
        assertEquals(gradients.keySet(), gradients2.keySet());
        for (String s : gradients.keySet()) {
            assertEquals(gradients2.get(s), gradients.get(s));
        }
    }
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) EmbeddingLayer(org.deeplearning4j.nn.conf.layers.EmbeddingLayer) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)15 Test (org.junit.Test)14 INDArray (org.nd4j.linalg.api.ndarray.INDArray)12 RnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor)10 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)8 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)6 Random (java.util.Random)5 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)5 GravesLSTM (org.deeplearning4j.nn.layers.recurrent.GravesLSTM)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)5 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)4 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)4 CnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor)3 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)3 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)2 EmbeddingLayer (org.deeplearning4j.nn.conf.layers.EmbeddingLayer)2 RnnToCnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor)2 Gradient (org.deeplearning4j.nn.gradient.Gradient)2 ArrayList (java.util.ArrayList)1 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)1