Search in sources :

Example 66 with NormalDistribution

use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testGradientMLP2LayerIrisL1L2Simple.

@Test
public void testGradientMLP2LayerIrisL1L2Simple() {
    //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
    //Need to run gradient through updater, so that L2 can be applied
    String[] activFns = { "sigmoid", "tanh" };
    //If true: run some backprop steps first
    boolean[] characteristic = { false, true };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "tanh" };
    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();
    INDArray input = ds.getFeatureMatrix();
    INDArray labels = ds.getLabels();
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    for (String afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                for (int k = 0; k < l2vals.length; k++) {
                    LossFunction lf = lossFunctions[i];
                    String outputActivation = outputActivations[i];
                    double l2 = l2vals[k];
                    double l1 = l1vals[k];
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).activation(outputActivation).build()).pretrain(false).backprop(true).build();
                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++) mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
                    }
                    if (PRINT_RESULTS) {
                        System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
                        for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }
                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                    String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                    assertTrue(msg, gradOK);
                }
            }
        }
    }
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DataNormalization(org.nd4j.linalg.dataset.api.preprocessor.DataNormalization) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 67 with NormalDistribution

use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.

the class LayerConfigTest method testWeightBiasInitLayerwiseOverride.

@Test
public void testWeightBiasInitLayerwiseOverride() {
    //Without layerwise override:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(1).getLayer().getDist().toString());
    assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
    assertEquals(1, conf.getConf(1).getLayer().getBiasInit(), 0.0);
    //With:
    conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).biasInit(0).build()).build();
    net = new MultiLayerNetwork(conf);
    net.init();
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
    assertEquals("UniformDistribution{lower=0.0, upper=1.0}", conf.getConf(1).getLayer().getDist().toString());
    assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
    assertEquals(0, conf.getConf(1).getLayer().getBiasInit(), 0.0);
}
Also used : NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 68 with NormalDistribution

use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTruncatedBPTTSimple.

@Test
public void testTruncatedBPTTSimple() {
    //Extremely simple test of the 'does it throw an exception' variety
    int timeSeriesLength = 12;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    int nTimeSlices = 20;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(timeSeriesLength).tBPTTForwardLength(timeSeriesLength).build();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, nTimeSlices * timeSeriesLength });
    INDArray labelsLong = Nd4j.rand(new int[] { miniBatchSize, nOut, nTimeSlices * timeSeriesLength });
    graph.fit(new INDArray[] { inputLong }, new INDArray[] { labelsLong });
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 69 with NormalDistribution

use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTruncatedBPTTVsBPTT.

@Test
public void testTruncatedBPTTVsBPTT() {
    //Under some (limited) circumstances, we expect BPTT and truncated BPTT to be identical
    //Specifically TBPTT over entire data vector
    int timeSeriesLength = 12;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).build();
    assertEquals(BackpropType.Standard, conf.getBackpropType());
    ComputationGraphConfiguration confTBPTT = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(timeSeriesLength).tBPTTBackwardLength(timeSeriesLength).build();
    assertEquals(BackpropType.TruncatedBPTT, confTBPTT.getBackpropType());
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graphTBPTT = new ComputationGraph(confTBPTT);
    graphTBPTT.init();
    assertTrue(graphTBPTT.getConfiguration().getBackpropType() == BackpropType.TruncatedBPTT);
    assertTrue(graphTBPTT.getConfiguration().getTbpttFwdLength() == timeSeriesLength);
    assertTrue(graphTBPTT.getConfiguration().getTbpttBackLength() == timeSeriesLength);
    INDArray inputData = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    INDArray labels = Nd4j.rand(new int[] { miniBatchSize, nOut, timeSeriesLength });
    graph.setInput(0, inputData);
    graph.setLabel(0, labels);
    graphTBPTT.setInput(0, inputData);
    graphTBPTT.setLabel(0, labels);
    graph.computeGradientAndScore();
    graphTBPTT.computeGradientAndScore();
    Pair<Gradient, Double> graphPair = graph.gradientAndScore();
    Pair<Gradient, Double> graphTbpttPair = graphTBPTT.gradientAndScore();
    assertEquals(graphPair.getFirst().gradientForVariable(), graphTbpttPair.getFirst().gradientForVariable());
    assertEquals(graphPair.getSecond(), graphTbpttPair.getSecond());
    //Check states: expect stateMap to be empty but tBpttStateMap to not be
    Map<String, INDArray> l0StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l0StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateMLN = graph.rnnGetPreviousState(0);
    Map<String, INDArray> l1StateTBPTT = graphTBPTT.rnnGetPreviousState(0);
    Map<String, INDArray> l0TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l0TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(0)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTState = ((BaseRecurrentLayer<?>) graph.getLayer(1)).rnnGetTBPTTState();
    Map<String, INDArray> l1TBPTTStateTBPTT = ((BaseRecurrentLayer<?>) graphTBPTT.getLayer(1)).rnnGetTBPTTState();
    assertTrue(l0StateMLN.isEmpty());
    assertTrue(l0StateTBPTT.isEmpty());
    assertTrue(l1StateMLN.isEmpty());
    assertTrue(l1StateTBPTT.isEmpty());
    assertTrue(l0TBPTTState.isEmpty());
    assertTrue(l0TBPTTStateTBPTT.size() == 2);
    assertTrue(l1TBPTTState.isEmpty());
    assertTrue(l1TBPTTStateTBPTT.size() == 2);
    INDArray tbpttActL0 = l0TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    INDArray tbpttActL1 = l1TBPTTStateTBPTT.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
    Map<String, INDArray> activations = graph.feedForward(inputData, true);
    INDArray l0Act = activations.get("0");
    INDArray l1Act = activations.get("1");
    INDArray expL0Act = l0Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    INDArray expL1Act = l1Act.tensorAlongDimension(timeSeriesLength - 1, 1, 0);
    assertEquals(tbpttActL0, expL0Act);
    assertEquals(tbpttActL1, expL1Act);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Gradient(org.deeplearning4j.nn.gradient.Gradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) BaseRecurrentLayer(org.deeplearning4j.nn.layers.recurrent.BaseRecurrentLayer) Test(org.junit.Test)

Example 70 with NormalDistribution

use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.

the class ComputationGraphTestRNN method testTBPTTLongerThanTS.

@Test
public void testTBPTTLongerThanTS() {
    int tbpttLength = 100;
    int timeSeriesLength = 20;
    int miniBatchSize = 7;
    int nIn = 5;
    int nOut = 4;
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "0").addLayer("out", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "1").setOutputs("out").pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(tbpttLength).tBPTTForwardLength(tbpttLength).build();
    Nd4j.getRandom().setSeed(12345);
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    INDArray labelsLong = Nd4j.rand(new int[] { miniBatchSize, nOut, timeSeriesLength });
    INDArray initialParams = graph.params().dup();
    graph.fit(new INDArray[] { inputLong }, new INDArray[] { labelsLong });
    INDArray afterParams = graph.params();
    assertNotEquals(initialParams, afterParams);
}
Also used : RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Aggregations

NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)90 Test (org.junit.Test)87 INDArray (org.nd4j.linalg.api.ndarray.INDArray)76 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)49 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)43 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)41 Random (java.util.Random)28 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)28 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)22 GravesLSTM (org.deeplearning4j.nn.layers.recurrent.GravesLSTM)13 DataSet (org.nd4j.linalg.dataset.DataSet)13 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)12 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)9 RnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor)6 Activation (org.nd4j.linalg.activations.Activation)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 DataNormalization (org.nd4j.linalg.dataset.api.preprocessor.DataNormalization)5 NormalizerMinMaxScaler (org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler)5 LossFunction (org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction)5 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4