Search in sources :

Example 71 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTSCG method testInputMasking.

@Test
public void testInputMasking() {
    //Idea: have masking on the input with 2 dense layers on input
    //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
    int[] miniBatchSizes = { 1, 2, 5 };
    int nIn = 2;
    Random r = new Random(12345);
    for (int nExamples : miniBatchSizes) {
        Nd4j.getRandom().setSeed(12345);
        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0").addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1").addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "2").setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()).inputPreProcessor("2", new FeedForwardToRnnPreProcessor()).build();
        ComputationGraph net = new ComputationGraph(conf);
        net.init();
        INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
        INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
        in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
        assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
        INDArray labels2 = Nd4j.create(nExamples, 1, 5);
        labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
        assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
        INDArray inputMask = Nd4j.ones(nExamples, 5);
        for (int j = 0; j < nExamples; j++) {
            inputMask.putScalar(new int[] { j, 4 }, 0);
        }
        net.setInput(0, in1);
        net.setLabel(0, labels1);
        net.computeGradientAndScore();
        double score1 = net.score();
        Gradient g1 = net.gradient();
        Map<String, INDArray> map = g1.gradientForVariable();
        for (String s : map.keySet()) {
            //Gradients are views; need to dup otherwise they will be modified by next computeGradientAndScore
            map.put(s, map.get(s).dup());
        }
        net.setInput(0, in2);
        net.setLabel(0, labels2);
        net.setLayerMaskArrays(new INDArray[] { inputMask }, null);
        net.computeGradientAndScore();
        double score2 = net.score();
        Gradient g2 = net.gradient();
        Map<String, INDArray> activations2 = net.feedForward();
        //Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
        assertNotEquals(score1, score2, 0.01);
        Map<String, INDArray> g1map = g1.gradientForVariable();
        Map<String, INDArray> g2map = g2.gradientForVariable();
        for (String s : g1map.keySet()) {
            INDArray g1s = g1map.get(s);
            INDArray g2s = g2map.get(s);
            assertNotEquals(s, g1s, g2s);
        }
        //Modify the values at the masked time step, and check that neither the gradients, score or activations change
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
            }
            net.setInput(0, in2);
            net.computeGradientAndScore();
            double score2a = net.score();
            Gradient g2a = net.gradient();
            assertEquals(score2, score2a, 1e-12);
            for (String s : g2.gradientForVariable().keySet()) {
                assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
            }
            Map<String, INDArray> activations2a = net.feedForward();
            for (String s : activations2.keySet()) {
                assertEquals(activations2.get(s), activations2a.get(s));
            }
        }
        //Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
        FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
        INDArray l0Before = activations2.get("0");
        INDArray l1Before = activations2.get("1");
        INDArray l0After = temp.preProcess(l0Before, nExamples);
        INDArray l1After = temp.preProcess(l1Before, nExamples);
        for (int j = 0; j < nExamples; j++) {
            for (int k = 0; k < nIn; k++) {
                assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
                assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
            }
        }
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) RnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor) Random(java.util.Random) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 72 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestVariableLengthTSCG method testOutputMasking.

@Test
public void testOutputMasking() {
    //If labels are masked: want zero outputs for that time step.
    int nIn = 3;
    int[] timeSeriesLengths = { 3, 10 };
    int[] outputSizes = { 1, 2, 5 };
    int[] miniBatchSizes = { 1, 4 };
    Random r = new Random(12345);
    for (int tsLength : timeSeriesLengths) {
        for (int nOut : outputSizes) {
            for (int miniBatch : miniBatchSizes) {
                for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
                    INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
                    for (int i = 0; i < miniBatch; i++) {
                        //For each example: select which outputs to mask...
                        int nMasked = 0;
                        while (nMasked < nToMask) {
                            int tryIdx = r.nextInt(tsLength);
                            if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
                                continue;
                            labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
                            nMasked++;
                        }
                    }
                    INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
                    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
                    ComputationGraph net = new ComputationGraph(conf);
                    net.init();
                    ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
                    ComputationGraph net2 = new ComputationGraph(conf2);
                    net2.init();
                    net.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
                    net2.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
                    INDArray out = net.output(input)[0];
                    INDArray out2 = net2.output(input)[0];
                    for (int i = 0; i < miniBatch; i++) {
                        for (int j = 0; j < tsLength; j++) {
                            double m = labelMaskArray.getDouble(i, j);
                            if (m == 0.0) {
                                //Expect outputs to be exactly 0.0
                                INDArray outRow = out.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
                                INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
                                for (int k = 0; k < nOut; k++) {
                                    assertEquals(outRow.getDouble(k), 0.0, 0.0);
                                    assertEquals(outRow2.getDouble(k), 0.0, 0.0);
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 73 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestCompGraphCNN method testCNNComputationGraphSingleOutFeatureMap.

@Test
public void testCNNComputationGraphSingleOutFeatureMap() {
    int imageWidth = 23;
    int imageHeight = 23;
    int nChannels = 1;
    int classes = 2;
    int numSamples = 200;
    int kernelHeight = 3;
    int kernelWidth = 3;
    DataSet trainInput;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).graphBuilder().addInputs("input").setInputTypes(InputType.convolutional(imageHeight, imageWidth, nChannels)).addLayer("conv1", new ConvolutionLayer.Builder().kernelSize(kernelHeight, kernelWidth).stride(1, 1).nIn(nChannels).nOut(// check if it can take 1 nOut only
    1).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build(), "input").addLayer("pool1", new SubsamplingLayer.Builder().poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build(), "conv1").addLayer("output", new OutputLayer.Builder().nOut(classes).build(), "pool1").setOutputs("output").backprop(true).pretrain(false).build();
    ComputationGraph model = new ComputationGraph(conf);
    model.init();
    INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels);
    INDArray emptyLables = Nd4j.zeros(numSamples, classes);
    trainInput = new DataSet(emptyFeatures, emptyLables);
    model.fit(trainInput);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Example 74 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class TestConvolutionModes method testGlobalLocalConfigCompGraph.

@Test
public void testGlobalLocalConfigCompGraph() {
    for (ConvolutionMode cm : new ConvolutionMode[] { ConvolutionMode.Strict, ConvolutionMode.Truncate, ConvolutionMode.Same }) {
        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).convolutionMode(cm).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "in").addLayer("1", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Strict).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "0").addLayer("2", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Truncate).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "1").addLayer("3", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Same).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "2").addLayer("4", new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "3").addLayer("5", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Strict).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "4").addLayer("6", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Truncate).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "5").addLayer("7", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Same).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "6").addLayer("8", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3).build(), "7").setOutputs("8").build();
        assertEquals(cm, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Strict, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Truncate, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Same, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("3")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(cm, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("4")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Strict, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("5")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Truncate, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("6")).getLayerConf().getLayer()).getConvolutionMode());
        assertEquals(ConvolutionMode.Same, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("7")).getLayerConf().getLayer()).getConvolutionMode());
    }
}
Also used : ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionMode(org.deeplearning4j.nn.conf.ConvolutionMode) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) Test(org.junit.Test)

Example 75 with ComputationGraphConfiguration

use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessorGraph.

@Test
public void testRnnTimeStepWithPreprocessorGraph() {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "0").addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1").setOutputs("2").inputPreProcessor("0", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    INDArray in = Nd4j.rand(1, 10);
    net.rnnTimeStep(in);
}
Also used : GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Aggregations

ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)96 Test (org.junit.Test)84 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)63 INDArray (org.nd4j.linalg.api.ndarray.INDArray)51 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)50 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)28 DataSet (org.nd4j.linalg.dataset.DataSet)22 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)20 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)17 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)17 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)17 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)14 Random (java.util.Random)13 ParameterAveragingTrainingMaster (org.deeplearning4j.spark.impl.paramavg.ParameterAveragingTrainingMaster)11 InMemoryModelSaver (org.deeplearning4j.earlystopping.saver.InMemoryModelSaver)10 MaxEpochsTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition)10 MultiDataSet (org.nd4j.linalg.dataset.MultiDataSet)10 MaxTimeIterationTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationCondition)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)9