Search in sources :

Example 1 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testGradientCnnFfRnn.

@Test
public void testGradientCnnFfRnn() {
    //Test gradients with CNN -> FF -> LSTM -> RnnOutputLayer
    //time series input/output (i.e., video classification or similar)
    int nChannelsIn = 3;
    //10px x 10px x 3 channels
    int inputSize = 10 * 10 * nChannelsIn;
    int miniBatchSize = 4;
    int timeSeriesLength = 10;
    int nClasses = 3;
    //Generate
    Nd4j.getRandom().setSeed(12345);
    INDArray input = Nd4j.rand(new int[] { miniBatchSize, inputSize, timeSeriesLength });
    INDArray labels = Nd4j.zeros(miniBatchSize, nClasses, timeSeriesLength);
    Random r = new Random(12345);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            int idx = r.nextInt(nClasses);
            labels.putScalar(new int[] { i, idx, j }, 1.0);
        }
    }
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).seed(12345).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1).activation(Activation.TANH).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build()).layer(2, new DenseLayer.Builder().nIn(5 * 5 * 5).nOut(4).activation(Activation.TANH).build()).layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()).layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses).activation(Activation.SOFTMAX).build()).setInputType(InputType.convolutional(10, 10, 3)).pretrain(false).backprop(true).build();
    //Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here
    conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(10, 10, 3));
    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
    mln.init();
    System.out.println("Params per layer:");
    for (int i = 0; i < mln.getnLayers(); i++) {
        System.out.println("layer " + i + "\t" + mln.getLayer(i).numParams());
    }
    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
    assertTrue(gradOK);
}
Also used : RnnToCnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Random(java.util.Random) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 2 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.

@Test
public void lossFunctionGradientCheckLossLayer() {
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    String[] outputActivationFn = new String[] { //xent
    "sigmoid", //xent
    "sigmoid", //cosine
    "tanh", //hinge -> trying to predict 1 or -1
    "tanh", //kld -> probab so should be between 0 and 1
    "sigmoid", //kld + softmax
    "softmax", //l1
    "tanh", //l1 + softmax
    "softmax", //l2
    "tanh", //l2 + softmax
    "softmax", //mae
    "identity", //mae + softmax
    "softmax", //mape
    "identity", //mape + softmax
    "softmax", //mcxent
    "softmax", //mse
    "identity", //mse + softmax
    "softmax", //msle  -   requires positive labels/activations due to log
    "sigmoid", //msle + softmax
    "softmax", //nll
    "sigmoid", //nll + softmax
    "softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
    "sigmoid", //squared hinge
    "tanh" };
    int[] nOut = new int[] { //xent
    1, //xent
    3, //cosine
    5, //hinge
    3, //kld
    3, //kld + softmax
    3, //l1
    3, //l1 + softmax
    3, //l2
    3, //l2 + softmax
    3, //mae
    3, //mae + softmax
    3, //mape
    3, //mape + softmax
    3, //mcxent
    3, //mse
    3, //mse + softmax
    3, //msle
    3, //msle + softmax
    3, //nll
    3, //nll + softmax
    3, //poisson
    3, //squared hinge
    3 };
    int[] minibatchSizes = new int[] { 1, 3 };
    //        int[] minibatchSizes = new int[]{3};
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (int i = 0; i < lossFunctions.length; i++) {
        for (int j = 0; j < minibatchSizes.length; j++) {
            String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
            INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
            INDArray input = inOut[0];
            INDArray labels = inOut[1];
            log.info(" ***** Starting test: {} *****", testName);
            //                System.out.println(Arrays.toString(labels.data().asDouble()));
            //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
            //                System.out.println(net.score(new DataSet(input,labels)));
            boolean gradOK;
            try {
                gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
            } catch (Exception e) {
                e.printStackTrace();
                failed.add(testName + "\t" + "EXCEPTION");
                continue;
            }
            if (gradOK) {
                passed.add(testName);
            } else {
                failed.add(testName);
            }
            System.out.println("\n\n");
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) LossLayer(org.deeplearning4j.nn.conf.layers.LossLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 3 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionWeightedGradientCheck.

@Test
public void lossFunctionWeightedGradientCheck() {
    INDArray[] weights = new INDArray[] { Nd4j.create(new double[] { 0.2, 0.3, 0.5 }), Nd4j.create(new double[] { 1.0, 0.5, 2.0 }) };
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (INDArray w : weights) {
        ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(w), new LossL1(w), new LossL1(w), new LossL2(w), new LossL2(w), new LossMAE(w), new LossMAE(w), new LossMAPE(w), new LossMAPE(w), new LossMCXENT(w), new LossMSE(w), new LossMSE(w), new LossMSLE(w), new LossMSLE(w), new LossNegativeLogLikelihood(w), new LossNegativeLogLikelihood(w) };
        String[] outputActivationFn = new String[] { //xent
        "sigmoid", //l1
        "tanh", //l1 + softmax
        "softmax", //l2
        "tanh", //l2 + softmax
        "softmax", //mae
        "identity", //mae + softmax
        "softmax", //mape
        "identity", //mape + softmax
        "softmax", //mcxent
        "softmax", //mse
        "identity", //mse + softmax
        "softmax", //msle  -   requires positive labels/activations due to log
        "sigmoid", //msle + softmax
        "softmax", //nll
        "sigmoid", //nll + softmax
        "softmax" };
        int[] minibatchSizes = new int[] { 1, 3 };
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int j = 0; j < minibatchSizes.length; j++) {
                String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j] + "; weights = " + w;
                Nd4j.getRandom().setSeed(12345);
                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-3, 3)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();
                INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, 3, 12345);
                INDArray input = inOut[0];
                INDArray labels = inOut[1];
                log.info(" ***** Starting test: {} *****", testName);
                //                System.out.println(Arrays.toString(labels.data().asDouble()));
                //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
                //                System.out.println(net.score(new DataSet(input,labels)));
                boolean gradOK;
                try {
                    gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                } catch (Exception e) {
                    e.printStackTrace();
                    failed.add(testName + "\t" + "EXCEPTION");
                    continue;
                }
                if (gradOK) {
                    passed.add(testName);
                } else {
                    failed.add(testName);
                }
                System.out.println("\n\n");
            }
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 4 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testPreTraining.

@Test
public void testPreTraining() {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().iterations(100).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(1e-6).regularization(true).l2(2e-4).graphBuilder().addInputs("in").addLayer("layer0", new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(), "in").addLayer("layer1", new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(), "in").addLayer("layer2", new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build(), "layer1").addLayer("out", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3 + 3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.SOFTMAX).build(), "layer0", "layer2").setOutputs("out").pretrain(true).backprop(false).build();
    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(1));
    DataSetIterator iter = new IrisDataSetIterator(10, 150);
    net.fit(iter);
}
Also used : IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) org.deeplearning4j.nn.conf(org.deeplearning4j.nn.conf) ScoreIterationListener(org.deeplearning4j.optimize.listeners.ScoreIterationListener) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 5 with UniformDistribution

use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTMTest method testSimpleForwardsAndBackwardsActivation.

@Test
public void testSimpleForwardsAndBackwardsActivation() {
    final int nIn = 2;
    final int layerSize = 3;
    final int miniBatchSize = 1;
    final int timeSeriesLength = 5;
    Nd4j.getRandom().setSeed(12345);
    final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).updater(Updater.NONE).build()).build();
    final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.ZERO).activation(Activation.TANH).build()).build();
    int numParams = confForwards.getLayer().initializer().numParams(confForwards);
    INDArray params = Nd4j.create(1, numParams);
    int numParamsBD = confBidirectional.getLayer().initializer().numParams(confBidirectional);
    INDArray paramsBD = Nd4j.create(1, numParamsBD);
    final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer().instantiate(confBidirectional, null, 0, paramsBD, true);
    final GravesLSTM forwardsLSTM = (GravesLSTM) confForwards.getLayer().instantiate(confForwards, null, 0, params, true);
    bidirectionalLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confBidirectional.getLayer().initializer().numParams(confBidirectional)));
    forwardsLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confForwards.getLayer().initializer().numParams(confForwards)));
    final INDArray sig = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
    final INDArray sigb = sig.dup();
    reverseColumnsInPlace(sigb.slice(0));
    final INDArray recurrentWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
    final INDArray inputWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
    final INDArray biasWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
    final INDArray recurrentWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    final INDArray inputWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    final INDArray biasWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.BIAS_KEY);
    //assert that the forwards part of the bidirectional layer is equal to that of the regular LSTM
    assertArrayEquals(recurrentWeightsF2.shape(), recurrentWeightsF.shape());
    assertArrayEquals(inputWeightsF2.shape(), inputWeightsF.shape());
    assertArrayEquals(biasWeightsF2.shape(), biasWeightsF.shape());
    forwardsLSTM.setParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, recurrentWeightsF);
    forwardsLSTM.setParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, inputWeightsF);
    forwardsLSTM.setParam(GravesLSTMParamInitializer.BIAS_KEY, biasWeightsF);
    //copy forwards weights to make the forwards activations do the same thing
    final INDArray recurrentWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
    final INDArray inputWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
    final INDArray biasWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
    //assert that the forwards and backwards are the same shapes
    assertArrayEquals(recurrentWeightsF.shape(), recurrentWeightsB.shape());
    assertArrayEquals(inputWeightsF.shape(), inputWeightsB.shape());
    assertArrayEquals(biasWeightsF.shape(), biasWeightsB.shape());
    //zero out backwards layer
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(inputWeightsB.shape()));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, Nd4j.zeros(biasWeightsB.shape()));
    forwardsLSTM.setInput(sig);
    //compare activations
    final INDArray activation1 = forwardsLSTM.activate(sig).slice(0);
    final INDArray activation2 = bidirectionalLSTM.activate(sig).slice(0);
    assertArrayEquals(activation1.data().asFloat(), activation2.data().asFloat(), 1e-5f);
    final INDArray randSig = Nd4j.rand(new int[] { 1, layerSize, timeSeriesLength });
    final INDArray randSigBackwards = randSig.dup();
    reverseColumnsInPlace(randSigBackwards.slice(0));
    final Pair<Gradient, INDArray> backprop1 = forwardsLSTM.backpropGradient(randSig);
    final Pair<Gradient, INDArray> backprop2 = bidirectionalLSTM.backpropGradient(randSig);
    //compare gradients
    assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
    assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
    assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
    //copy forwards to backwards
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS));
    //zero out forwards layer
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(inputWeightsB.shape()));
    bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, Nd4j.zeros(biasWeightsB.shape()));
    //run on reversed signal
    final INDArray activation3 = bidirectionalLSTM.activate(sigb).slice(0);
    final INDArray activation3Reverse = activation3.dup();
    reverseColumnsInPlace(activation3Reverse);
    assertEquals(activation3Reverse, activation1);
    assertArrayEquals(activation3Reverse.shape(), activation1.shape());
    //test backprop now
    final INDArray refBackGradientReccurrent = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
    final INDArray refBackGradientInput = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
    final INDArray refBackGradientBias = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY);
    //reverse weights only with backwards signal should yield same result as forwards weights with forwards signal
    final Pair<Gradient, INDArray> backprop3 = bidirectionalLSTM.backpropGradient(randSigBackwards);
    final INDArray backGradientRecurrent = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
    final INDArray backGradientInput = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
    final INDArray backGradientBias = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
    assertArrayEquals(refBackGradientBias.dup().data().asDouble(), backGradientBias.dup().data().asDouble(), 1e-6);
    assertArrayEquals(refBackGradientInput.dup().data().asDouble(), backGradientInput.dup().data().asDouble(), 1e-6);
    assertArrayEquals(refBackGradientReccurrent.dup().data().asDouble(), backGradientRecurrent.dup().data().asDouble(), 1e-6);
    final INDArray refEpsilon = backprop1.getSecond().dup();
    final INDArray backEpsilon = backprop3.getSecond().dup();
    reverseColumnsInPlace(refEpsilon.slice(0));
    assertArrayEquals(backEpsilon.dup().data().asDouble(), refEpsilon.dup().data().asDouble(), 1e-6);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Aggregations

UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)22 Test (org.junit.Test)20 INDArray (org.nd4j.linalg.api.ndarray.INDArray)19 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)14 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)10 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)10 Random (java.util.Random)6 DataSet (org.nd4j.linalg.dataset.DataSet)6 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4 Field (java.lang.reflect.Field)3 ArrayList (java.util.ArrayList)3 org.deeplearning4j.nn.conf.layers (org.deeplearning4j.nn.conf.layers)3 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 Gradient (org.deeplearning4j.nn.gradient.Gradient)3 Activation (org.nd4j.linalg.activations.Activation)3 ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)3 LossFunctions (org.nd4j.linalg.lossfunctions.LossFunctions)3 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)2 Evaluation (org.deeplearning4j.eval.Evaluation)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2