Search in sources :

Example 1 with LossFunction

use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.

the class VaeGradientCheckTests method testVaeAsMLP.

@Test
public void testVaeAsMLP() {
    //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
    //This gradient check tests this part
    //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
    String[] activFns = { "identity", "tanh" };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "tanh" };
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
    int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
    Nd4j.getRandom().setSeed(12345);
    for (int minibatch : new int[] { 1, 5 }) {
        INDArray input = Nd4j.rand(minibatch, 4);
        INDArray labels = Nd4j.create(minibatch, 3);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, i % 3, 1.0);
        }
        for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
            int[] encoderSizes = encoderLayerSizes[ls];
            int[] decoderSizes = decoderLayerSizes[ls];
            for (String afn : activFns) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        String outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];
                        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()).pretrain(false).backprop(true).build();
                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();
                        String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
                            for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }
                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                    }
                }
            }
        }
    }
}
Also used : MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 2 with LossFunction

use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.

the class MultiNeuralNetConfLayerBuilderTest method testNeuralNetConfigAPI.

@Test
public void testNeuralNetConfigAPI() {
    LossFunction newLoss = LossFunction.SQUARED_LOSS;
    int newNumIn = numIn + 1;
    int newNumOut = numOut + 1;
    WeightInit newWeight = WeightInit.UNIFORM;
    double newDrop = 0.5;
    int[] newFS = new int[] { 3, 3 };
    int newFD = 7;
    int[] newStride = new int[] { 3, 3 };
    Convolution.Type newConvType = Convolution.Type.SAME;
    PoolingType newPoolType = PoolingType.AVG;
    double newCorrupt = 0.5;
    double newSparsity = 0.5;
    HiddenUnit newHidden = HiddenUnit.BINARY;
    VisibleUnit newVisible = VisibleUnit.BINARY;
    MultiLayerConfiguration multiConf1 = new NeuralNetConfiguration.Builder().list().layer(0, new DenseLayer.Builder().nIn(newNumIn).nOut(newNumOut).activation(act).build()).layer(1, new DenseLayer.Builder().nIn(newNumIn + 1).nOut(newNumOut + 1).activation(act).build()).build();
    NeuralNetConfiguration firstLayer = multiConf1.getConf(0);
    NeuralNetConfiguration secondLayer = multiConf1.getConf(1);
    assertFalse(firstLayer.equals(secondLayer));
}
Also used : HiddenUnit(org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit) VisibleUnit(org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit) WeightInit(org.deeplearning4j.nn.weights.WeightInit) PoolingType(org.deeplearning4j.nn.conf.layers.SubsamplingLayer.PoolingType) Convolution(org.nd4j.linalg.convolution.Convolution) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) Test(org.junit.Test)

Example 3 with LossFunction

use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testRbm.

@Test
public void testRbm() {
    //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
    //Need to run gradient through updater, so that L2 can be applied
    RBM.HiddenUnit[] hiddenFunc = { RBM.HiddenUnit.BINARY, RBM.HiddenUnit.RECTIFIED };
    //If true: run some backprop steps first
    boolean[] characteristic = { false, true };
    LossFunction[] lossFunctions = { LossFunction.MSE, LossFunction.KL_DIVERGENCE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "sigmoid" };
    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();
    INDArray input = ds.getFeatureMatrix();
    INDArray labels = ds.getLabels();
    double[] l2vals = { 0.4, 0.0, 0.4 };
    //i.e., use l2vals[i] with l1vals[i]
    double[] l1vals = { 0.0, 0.5, 0.5 };
    for (RBM.HiddenUnit hidunit : hiddenFunc) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                for (int k = 0; k < l2vals.length; k++) {
                    LossFunction lf = lossFunctions[i];
                    String outputActivation = outputActivations[i];
                    double l2 = l2vals[k];
                    double l1 = l1vals[k];
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).learningRate(1.0).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L).list().layer(0, new RBM.Builder(hidunit, RBM.VisibleUnit.BINARY).nIn(4).nOut(3).weightInit(WeightInit.UNIFORM).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.SGD).activation(outputActivation).build()).pretrain(false).backprop(true).build();
                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++) mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < scoreBefore);
                    }
                    if (PRINT_RESULTS) {
                        System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
                        for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }
                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                    String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                    assertTrue(msg, gradOK);
                }
            }
        }
    }
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) DataNormalization(org.nd4j.linalg.dataset.api.preprocessor.DataNormalization) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 4 with LossFunction

use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testAutoEncoder.

@Test
public void testAutoEncoder() {
    //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
    //Need to run gradient through updater, so that L2 can be applied
    String[] activFns = { "sigmoid", "tanh" };
    //If true: run some backprop steps first
    boolean[] characteristic = { false, true };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "tanh" };
    DataNormalization scaler = new NormalizerMinMaxScaler();
    DataSetIterator iter = new IrisDataSetIterator(150, 150);
    scaler.fit(iter);
    iter.setPreProcessor(scaler);
    DataSet ds = iter.next();
    INDArray input = ds.getFeatureMatrix();
    INDArray labels = ds.getLabels();
    NormalizerStandardize norm = new NormalizerStandardize();
    norm.fit(ds);
    norm.transform(ds);
    double[] l2vals = { 0.2, 0.0, 0.2 };
    //i.e., use l2vals[i] with l1vals[i]
    double[] l1vals = { 0.0, 0.3, 0.3 };
    for (String afn : activFns) {
        for (boolean doLearningFirst : characteristic) {
            for (int i = 0; i < lossFunctions.length; i++) {
                for (int k = 0; k < l2vals.length; k++) {
                    LossFunction lf = lossFunctions[i];
                    String outputActivation = outputActivations[i];
                    double l2 = l2vals[k];
                    double l1 = l1vals[k];
                    Nd4j.getRandom().setSeed(12345);
                    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0).l2(l2).l1(l1).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).activation(outputActivation).build()).pretrain(true).backprop(true).build();
                    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                    mln.init();
                    if (doLearningFirst) {
                        //Run a number of iterations of learning
                        mln.setInput(ds.getFeatures());
                        mln.setLabels(ds.getLabels());
                        mln.computeGradientAndScore();
                        double scoreBefore = mln.score();
                        for (int j = 0; j < 10; j++) mln.fit(ds);
                        mln.computeGradientAndScore();
                        double scoreAfter = mln.score();
                        //Can't test in 'characteristic mode of operation' if not learning
                        String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
                        assertTrue(msg, scoreAfter < scoreBefore);
                    }
                    if (PRINT_RESULTS) {
                        System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
                        for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                    }
                    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                    String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
                    assertTrue(msg, gradOK);
                }
            }
        }
    }
}
Also used : NormalizerMinMaxScaler(org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) DataNormalization(org.nd4j.linalg.dataset.api.preprocessor.DataNormalization) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) NormalizerStandardize(org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) Test(org.junit.Test)

Example 5 with LossFunction

use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testGradientGravesLSTMFull.

@Test
public void testGradientGravesLSTMFull() {
    String[] activFns = { "tanh", "softsign" };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "tanh" };
    int timeSeriesLength = 8;
    int nIn = 7;
    int layerSize = 9;
    int nOut = 4;
    int miniBatchSize = 6;
    Random r = new Random(12345L);
    INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < nIn; j++) {
            for (int k = 0; k < timeSeriesLength; k++) {
                input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
            }
        }
    }
    INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            int idx = r.nextInt(nOut);
            labels.putScalar(new int[] { i, idx, j }, 1.0f);
        }
    }
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    for (String afn : activFns) {
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int k = 0; k < l2vals.length; k++) {
                LossFunction lf = lossFunctions[i];
                String outputActivation = outputActivations[i];
                double l2 = l2vals[k];
                double l1 = l1vals[k];
                NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0).seed(12345L);
                if (l1 > 0.0)
                    conf.l1(l1);
                if (l2 > 0.0)
                    conf.l2(l2);
                if (biasL2[k] > 0)
                    conf.l2Bias(biasL2[k]);
                if (biasL1[k] > 0)
                    conf.l1Bias(biasL1[k]);
                NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true);
                MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build());
                mln.init();
                if (PRINT_RESULTS) {
                    System.out.println("testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1);
                    for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                }
                boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
                assertTrue(msg, gradOK);
            }
        }
    }
}
Also used : NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Random(java.util.Random) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)8 LossFunction (org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction)8 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)7 INDArray (org.nd4j.linalg.api.ndarray.INDArray)7 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)6 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)6 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)4 DataSet (org.nd4j.linalg.dataset.DataSet)4 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)4 DataNormalization (org.nd4j.linalg.dataset.api.preprocessor.DataNormalization)4 NormalizerMinMaxScaler (org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler)4 Random (java.util.Random)2 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)1 HiddenUnit (org.deeplearning4j.nn.conf.layers.RBM.HiddenUnit)1 VisibleUnit (org.deeplearning4j.nn.conf.layers.RBM.VisibleUnit)1 PoolingType (org.deeplearning4j.nn.conf.layers.SubsamplingLayer.PoolingType)1 WeightInit (org.deeplearning4j.nn.weights.WeightInit)1 Activation (org.nd4j.linalg.activations.Activation)1 IActivation (org.nd4j.linalg.activations.IActivation)1