Search in sources :

Example 16 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class BNGradientCheckTest method testGradientBNWithCNNandSubsamplingCompGraph.

@Test
public void testGradientBNWithCNNandSubsamplingCompGraph() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    // (d) l1 and l2 values
    Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
    //If true: run some backprop steps first
    boolean[] characteristic = { false, true };
    LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
    double[] l2vals = { 0.0, 0.1, 0.1 };
    //i.e., use l2vals[j] with l1vals[j]
    double[] l1vals = { 0.0, 0.0, 0.2 };
    Nd4j.getRandom().setSeed(12345);
    int minibatch = 10;
    int depth = 2;
    int hw = 5;
    int nOut = 3;
    INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
    INDArray labels = Nd4j.zeros(minibatch, nOut);
    Random r = new Random(12345);
    for (int i = 0; i < minibatch; i++) {
        labels.putScalar(i, r.nextInt(nOut), 1.0);
    }
    DataSet ds = new DataSet(input, labels);
    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                for (int j = 0; j < l2vals.length; j++) {
                    LossFunctions.LossFunction lf = lossFunctions[i];
                    Activation outputActivation = outputActivations[i];
                    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build(), "in").addLayer("1", new BatchNormalization.Builder().build(), "0").addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build(), "1").addLayer("3", new BatchNormalization(), "2").addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3").addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build(), "4").setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true).build();
                    ComputationGraph net = new ComputationGraph(conf);
                    net.init();
                    String name = new Object() {
                    }.getClass().getEnclosingMethod().getName();
                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        net.setInput(0, ds.getFeatures());
                        net.setLabels(ds.getLabels());
                        net.computeGradientAndScore();
                        double scoreBefore = net.score();
                        for (int k = 0; k < 5; k++) net.fit(ds);
                        net.computeGradientAndScore();
                        double scoreAfter = net.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                    }
                    if (PRINT_RESULTS) {
                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
                        for (int k = 0; k < net.getNumLayers(); k++) System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
                    }
                    boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
                    assertTrue(gradOK);
                }
            }
        }
    }
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) Activation(org.nd4j.linalg.activations.Activation) Random(java.util.Random) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) LossFunctions(org.nd4j.linalg.lossfunctions.LossFunctions) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 17 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class LayerConfigTest method testWeightBiasInitLayerwiseOverride.

@Test
public void testWeightBiasInitLayerwiseOverride() {
    //Without layerwise override:
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(1).getLayer().getDist().toString());
    assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
    assertEquals(1, conf.getConf(1).getLayer().getBiasInit(), 0.0);
    //With:
    conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).biasInit(0).build()).build();
    net = new MultiLayerNetwork(conf);
    net.init();
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
    assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
    assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
    assertEquals("UniformDistribution{lower=0.0, upper=1.0}", conf.getConf(1).getLayer().getDist().toString());
    assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
    assertEquals(0, conf.getConf(1).getLayer().getBiasInit(), 0.0);
}
Also used : NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 18 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class TestDropout method testDropoutMultiLayer.

@Test
public void testDropoutMultiLayer() throws Exception {
    //Testing dropout with multiple layers
    //Layer input: values should be set to either 0.0 or 2.0x original value
    //However: we don't have access to 'original' activations easily
    //Instead: use sigmoid + weight initialization that saturates
    int nIn = 8;
    int layerSize = 10;
    int nOut = 4;
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).iterations(1).regularization(true).dropOut(0.5).learningRate(1e-9).weightInit(WeightInit.DISTRIBUTION).dist(//Weight init to cause sigmoid saturation
    new UniformDistribution(10, 11)).list().layer(0, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(layerSize).build()).layer(1, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(layerSize).nOut(layerSize).build()).layer(2, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(layerSize).nOut(nOut).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Field dropoutMaskField = BaseLayer.class.getDeclaredField("dropoutMask");
    dropoutMaskField.setAccessible(true);
    int nTests = 15;
    Nd4j.getRandom().setSeed(12345);
    int noDropoutCount = 0;
    for (int i = 0; i < nTests; i++) {
        INDArray in = Nd4j.rand(1, nIn).dup('c');
        INDArray out = Nd4j.rand(1, nOut).dup('c');
        INDArray inCopy = in.dup('c');
        net.fit(new DataSet(in, out));
        INDArray l0Input = net.getLayer(0).input().dup('c');
        //Dropout occurred. Expect inputs to be either scaled 2x original, or set to 0.0 (with dropout = 0.5)
        NdIndexIterator iter = new NdIndexIterator(inCopy.shape());
        while (iter.hasNext()) {
            int[] idx = iter.next();
            double origValue = inCopy.getDouble(idx);
            double doValue = l0Input.getDouble(idx);
            if (doValue > 0.0) {
                //Input was kept -> should be scaled by factor of (1.0/0.5 = 2)
                assertEquals(origValue * 2.0, doValue, 0.0001);
            }
        }
        //all be ~1.0 before dropout -> either 0 or ~2.0 after dropout
        for (int j = 1; j < 3; j++) {
            INDArray ljInput = net.getLayer(j).input();
            for (int k = 0; k < ljInput.length(); k++) {
                double doValue = ljInput.getDouble(j);
                if (doValue > 0.0) {
                    //Input was kept -> should be scaled by factor of (1.0/0.5 = 2)
                    //Sigmoid is saturated -> inputs should be ~1.0 -> 2.0 after dropout
                    assertEquals(2.0, doValue, 0.1);
                }
            }
        }
        //Do forward pass
        //(1) ensure dropout ISN'T being applied for forward pass at test time
        //(2) ensure dropout ISN'T being applied for test time scoring
        //If dropout is applied at test time: outputs + score will differ between passes
        INDArray in2 = Nd4j.rand(1, nIn);
        INDArray out2 = Nd4j.rand(1, nOut);
        INDArray outTest1 = net.output(in2, false);
        INDArray outTest2 = net.output(in2, false);
        INDArray outTest3 = net.output(in2, false);
        assertEquals(outTest1, outTest2);
        assertEquals(outTest1, outTest3);
        double score1 = net.score(new DataSet(in2, out2), false);
        double score2 = net.score(new DataSet(in2, out2), false);
        double score3 = net.score(new DataSet(in2, out2), false);
        assertEquals(score1, score2, 0.0);
        assertEquals(score1, score3, 0.0);
    }
    if (noDropoutCount >= nTests / 3) {
        //at 0.5 dropout ratio and more than a few inputs, expect only a very small number of instances where
        //no dropout occurs, just due to random chance
        fail("Too many instances of dropout not being applied");
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) NdIndexIterator(org.nd4j.linalg.api.iter.NdIndexIterator) DataSet(org.nd4j.linalg.dataset.DataSet) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Field(java.lang.reflect.Field) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 19 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTMTest method testGravesBidirectionalLSTMForwardPassHelper.

@Test
public void testGravesBidirectionalLSTMForwardPassHelper() throws Exception {
    //GravesBidirectionalLSTM.activateHelper() has different behaviour (due to optimizations) when forBackprop==true vs false
    //But should otherwise provide identical activations
    Nd4j.getRandom().setSeed(12345);
    final int nIn = 10;
    final int layerSize = 15;
    final int miniBatchSize = 4;
    final int timeSeriesLength = 7;
    final NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    final GravesBidirectionalLSTM lstm = (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
    final INDArray input = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    lstm.setInput(input);
    final INDArray fwdPassFalse = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null, false, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null).fwdPassOutput;
    final INDArray[] fwdPassTrue = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null, true, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null).fwdPassOutputAsArrays;
    //I have no idea what the heck this does --Ben
    for (int i = 0; i < timeSeriesLength; i++) {
        final INDArray sliceFalse = fwdPassFalse.tensorAlongDimension(i, 1, 0);
        final INDArray sliceTrue = fwdPassTrue[i];
        assertTrue(sliceFalse.equals(sliceTrue));
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) ActivationSigmoid(org.nd4j.linalg.activations.impl.ActivationSigmoid) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Example 20 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTMTest method testGetSetParmas.

@Test
public void testGetSetParmas() {
    final int nIn = 2;
    final int layerSize = 3;
    final int miniBatchSize = 2;
    final int timeSeriesLength = 10;
    Nd4j.getRandom().setSeed(12345);
    final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).build()).build();
    int numParams = confBidirectional.getLayer().initializer().numParams(confBidirectional);
    INDArray params = Nd4j.create(1, numParams);
    final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer().instantiate(confBidirectional, null, 0, params, true);
    final INDArray sig = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    final INDArray act1 = bidirectionalLSTM.activate(sig);
    params = bidirectionalLSTM.params();
    bidirectionalLSTM.setParams(params);
    final INDArray act2 = bidirectionalLSTM.activate(sig);
    assertArrayEquals(act2.data().asDouble(), act1.data().asDouble(), 1e-8);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Test(org.junit.Test)

Aggregations

UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)22 Test (org.junit.Test)20 INDArray (org.nd4j.linalg.api.ndarray.INDArray)19 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)14 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)10 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)10 Random (java.util.Random)6 DataSet (org.nd4j.linalg.dataset.DataSet)6 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4 Field (java.lang.reflect.Field)3 ArrayList (java.util.ArrayList)3 org.deeplearning4j.nn.conf.layers (org.deeplearning4j.nn.conf.layers)3 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 Gradient (org.deeplearning4j.nn.gradient.Gradient)3 Activation (org.nd4j.linalg.activations.Activation)3 ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)3 LossFunctions (org.nd4j.linalg.lossfunctions.LossFunctions)3 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)2 Evaluation (org.deeplearning4j.eval.Evaluation)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2