Search in sources :

Example 6 with Activation

use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.

the class BNGradientCheckTest method testGradientBNWithCNNandSubsamplingCompGraph.

@Test
public void testGradientBNWithCNNandSubsamplingCompGraph() {
    //Parameterized test, testing combinations of:
    // (a) activation function
    // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
    // (c) Loss function (with specified output activations)
    // (d) l1 and l2 values
    Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
    //If true: run some backprop steps first
    boolean[] characteristic = { false, true };
    LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
    double[] l2vals = { 0.0, 0.1, 0.1 };
    //i.e., use l2vals[j] with l1vals[j]
    double[] l1vals = { 0.0, 0.0, 0.2 };
    Nd4j.getRandom().setSeed(12345);
    int minibatch = 10;
    int depth = 2;
    int hw = 5;
    int nOut = 3;
    INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
    INDArray labels = Nd4j.zeros(minibatch, nOut);
    Random r = new Random(12345);
    for (int i = 0; i < minibatch; i++) {
        labels.putScalar(i, r.nextInt(nOut), 1.0);
    }
    DataSet ds = new DataSet(input, labels);
    for (Activation afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                for (int j = 0; j < l2vals.length; j++) {
                    LossFunctions.LossFunction lf = lossFunctions[i];
                    Activation outputActivation = outputActivations[i];
                    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build(), "in").addLayer("1", new BatchNormalization.Builder().build(), "0").addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build(), "1").addLayer("3", new BatchNormalization(), "2").addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3").addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build(), "4").setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true).build();
                    ComputationGraph net = new ComputationGraph(conf);
                    net.init();
                    String name = new Object() {
                    }.getClass().getEnclosingMethod().getName();
                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        net.setInput(0, ds.getFeatures());
                        net.setLabels(ds.getLabels());
                        net.computeGradientAndScore();
                        double scoreBefore = net.score();
                        for (int k = 0; k < 5; k++) net.fit(ds);
                        net.computeGradientAndScore();
                        double scoreAfter = net.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
                    }
                    if (PRINT_RESULTS) {
                        System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
                        for (int k = 0; k < net.getNumLayers(); k++) System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
                    }
                    boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
                    assertTrue(gradOK);
                }
            }
        }
    }
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) Activation(org.nd4j.linalg.activations.Activation) Random(java.util.Random) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) LossFunctions(org.nd4j.linalg.lossfunctions.LossFunctions) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 7 with Activation

use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testGradientGravesBidirectionalLSTMFull.

@Test
public void testGradientGravesBidirectionalLSTMFull() {
    Activation[] activFns = { Activation.TANH, Activation.SOFTSIGN };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
    int timeSeriesLength = 4;
    int nIn = 2;
    int layerSize = 2;
    int nOut = 2;
    int miniBatchSize = 3;
    Random r = new Random(12345L);
    INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < nIn; j++) {
            for (int k = 0; k < timeSeriesLength; k++) {
                input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
            }
        }
    }
    INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            int idx = r.nextInt(nOut);
            labels.putScalar(new int[] { i, idx, j }, 1.0f);
        }
    }
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    for (Activation afn : activFns) {
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                LossFunction lf = lossFunctions[i];
                Activation outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];
                NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0);
                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[k] > 0)
                    conf.l2Bias(biasL2[k]);
                if (biasL1[k] > 0)
                    conf.l1Bias(biasL1[k]);
                MultiLayerConfiguration mlc = conf.seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
                MultiLayerNetwork mln = new MultiLayerNetwork(mlc);
                mln.init();
                if (PRINT_RESULTS) {
                    System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1);
                    for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }
                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK);
            }
        }
    }
}
Also used : Activation(org.nd4j.linalg.activations.Activation) IActivation(org.nd4j.linalg.activations.IActivation) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 8 with Activation

use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.

the class TestMasking method testPerOutputMaskingMLN.

@Test
public void testPerOutputMaskingMLN() {
    //Idea: for per-output masking, the contents of the masked label entries should make zero difference to either
    // the score or the gradients
    int nIn = 6;
    int layerSize = 4;
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
    INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
    INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int nOut = labelMask.size(1);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            net.setInput(features);
            net.setLabels(labels);
            net.computeGradientAndScore();
            double score1 = net.score();
            INDArray grad1 = net.gradient().gradient();
            //Now: change the label values for the masked steps. The
            INDArray maskZeroLocations = Nd4j.getExecutioner().execAndReturn(new Not(labelMask.dup()));
            INDArray rand = Nd4j.rand(maskZeroLocations.shape()).muli(0.5);
            //Only the masked values are changed
            INDArray newLabels = labels.add(rand.muli(maskZeroLocations));
            net.setLabels(newLabels);
            net.computeGradientAndScore();
            assertNotEquals(labels, newLabels);
            double score2 = net.score();
            INDArray grad2 = net.gradient().gradient();
            assertEquals(score1, score2, 1e-6);
            assertEquals(grad1, grad2);
            //Do the same for CompGraph
            ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
            ComputationGraph graph = new ComputationGraph(conf2);
            graph.init();
            graph.setLayerMaskArrays(null, new INDArray[] { labelMask });
            graph.setInputs(features);
            graph.setLabels(labels);
            graph.computeGradientAndScore();
            double gScore1 = graph.score();
            INDArray gGrad1 = graph.gradient().gradient();
            graph.setLabels(newLabels);
            graph.computeGradientAndScore();
            double gScore2 = graph.score();
            INDArray gGrad2 = graph.gradient().gradient();
            assertEquals(gScore1, gScore2, 1e-6);
            assertEquals(gGrad1, gGrad2);
        }
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Activation(org.nd4j.linalg.activations.Activation) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) Not(org.nd4j.linalg.api.ops.impl.transforms.Not) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) Test(org.junit.Test)

Example 9 with Activation

use of org.nd4j.linalg.activations.Activation in project nd4j by deeplearning4j.

the class SameDiffTests method testActivationBackprop.

@Test
public void testActivationBackprop() {
    Activation[] afns = new Activation[] { Activation.TANH, Activation.SIGMOID, Activation.ELU, Activation.SOFTPLUS, Activation.SOFTSIGN, Activation.HARDTANH, // WRONG output - see issue https://github.com/deeplearning4j/nd4j/issues/2426
    Activation.CUBE, // JVM crash
    Activation.RELU, // JVM crash
    Activation.LEAKYRELU };
    for (Activation a : afns) {
        SameDiff sd = SameDiff.create();
        INDArray inArr = Nd4j.linspace(-3, 3, 7);
        INDArray labelArr = Nd4j.linspace(-3, 3, 7).muli(0.5);
        SDVariable in = sd.var("in", inArr.dup());
        // System.out.println("inArr: " + inArr);
        INDArray outExp;
        SDVariable out;
        switch(a) {
            case ELU:
                out = sd.elu("out", in);
                outExp = Transforms.elu(inArr, true);
                break;
            case HARDTANH:
                out = sd.hardTanh("out", in);
                outExp = Transforms.hardTanh(inArr, true);
                break;
            case LEAKYRELU:
                out = sd.leakyRelu("out", in, 0.01);
                outExp = Transforms.leakyRelu(inArr, true);
                break;
            case RELU:
                out = sd.relu("out", in, 0.0);
                outExp = Transforms.relu(inArr, true);
                break;
            case SIGMOID:
                out = sd.sigmoid("out", in);
                outExp = Transforms.sigmoid(inArr, true);
                break;
            case SOFTPLUS:
                out = sd.softplus("out", in);
                outExp = Transforms.softPlus(inArr, true);
                break;
            case SOFTSIGN:
                out = sd.softsign("out", in);
                outExp = Transforms.softsign(inArr, true);
                break;
            case TANH:
                out = sd.tanh("out", in);
                outExp = Transforms.tanh(inArr, true);
                break;
            case CUBE:
                out = sd.cube("out", in);
                outExp = Transforms.pow(inArr, 3, true);
                break;
            default:
                throw new RuntimeException(a.toString());
        }
        // Sum squared error loss:
        SDVariable label = sd.var("label", labelArr.dup());
        SDVariable diff = label.sub("diff", out);
        SDVariable sqDiff = diff.mul("sqDiff", diff);
        // Loss function...
        SDVariable totSum = sd.sum("totSum", sqDiff, Integer.MAX_VALUE);
        sd.exec();
        INDArray outAct = sd.getVariable("out").getArr();
        assertEquals(a.toString(), outExp, outAct);
        // L = sum_i (label - out)^2
        // dL/dOut = 2(out - label)
        INDArray dLdOutExp = outExp.sub(labelArr).mul(2);
        INDArray dLdInExp = a.getActivationFunction().backprop(inArr.dup(), dLdOutExp.dup()).getFirst();
        sd.execBackwards();
        SameDiff gradFn = sd.getFunction("grad");
        INDArray dLdOutAct = gradFn.getVariable("out-grad").getArr();
        INDArray dLdInAct = gradFn.getVariable("in-grad").getArr();
        assertEquals(a.toString(), dLdOutExp, dLdOutAct);
        assertEquals(a.toString(), dLdInExp, dLdInAct);
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) Activation(org.nd4j.linalg.activations.Activation) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)9 Activation (org.nd4j.linalg.activations.Activation)9 INDArray (org.nd4j.linalg.api.ndarray.INDArray)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)6 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)6 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)5 Random (java.util.Random)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)3 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)3 DataSet (org.nd4j.linalg.dataset.DataSet)3 ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)3 LossFunctions (org.nd4j.linalg.lossfunctions.LossFunctions)3 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)2 Convolution1DLayer (org.deeplearning4j.nn.conf.layers.Convolution1DLayer)1 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)1 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)1 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)1 Subsampling1DLayer (org.deeplearning4j.nn.conf.layers.Subsampling1DLayer)1 SubsamplingLayer (org.deeplearning4j.nn.conf.layers.SubsamplingLayer)1