Search in sources :

Example 1 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsMasking method testPerOutputMaskingRnn.

@Test
public void testPerOutputMaskingRnn() {
    //For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
    // 2d masks (used for per *example* masking)
    int nIn = 4;
    int layerSize = 4;
    int nOut = 4;
    //1 example, TS length 3
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
    //1 example, TS length 1
    INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
    //3 examples, TS length 3
    INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
    1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
    INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int tsLength = labelMask.size(2);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
            System.out.println(msg);
            boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
            assertTrue(msg, gradOK);
            //Check the equivalent compgraph:
            Nd4j.getRandom().setSeed(12345);
            ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
            ComputationGraph graph = new ComputationGraph(cg);
            graph.init();
            net.setLayerMaskArrays(null, labelMask);
            gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
            assertTrue(msg + " (compgraph)", gradOK);
        }
    }
}
Also used : Activation(org.nd4j.linalg.activations.Activation) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 2 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.

@Test
public void lossFunctionGradientCheckLossLayer() {
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    String[] outputActivationFn = new String[] { //xent
    "sigmoid", //xent
    "sigmoid", //cosine
    "tanh", //hinge -> trying to predict 1 or -1
    "tanh", //kld -> probab so should be between 0 and 1
    "sigmoid", //kld + softmax
    "softmax", //l1
    "tanh", //l1 + softmax
    "softmax", //l2
    "tanh", //l2 + softmax
    "softmax", //mae
    "identity", //mae + softmax
    "softmax", //mape
    "identity", //mape + softmax
    "softmax", //mcxent
    "softmax", //mse
    "identity", //mse + softmax
    "softmax", //msle  -   requires positive labels/activations due to log
    "sigmoid", //msle + softmax
    "softmax", //nll
    "sigmoid", //nll + softmax
    "softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
    "sigmoid", //squared hinge
    "tanh" };
    int[] nOut = new int[] { //xent
    1, //xent
    3, //cosine
    5, //hinge
    3, //kld
    3, //kld + softmax
    3, //l1
    3, //l1 + softmax
    3, //l2
    3, //l2 + softmax
    3, //mae
    3, //mae + softmax
    3, //mape
    3, //mape + softmax
    3, //mcxent
    3, //mse
    3, //mse + softmax
    3, //msle
    3, //msle + softmax
    3, //nll
    3, //nll + softmax
    3, //poisson
    3, //squared hinge
    3 };
    int[] minibatchSizes = new int[] { 1, 3 };
    //        int[] minibatchSizes = new int[]{3};
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (int i = 0; i < lossFunctions.length; i++) {
        for (int j = 0; j < minibatchSizes.length; j++) {
            String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
            INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
            INDArray input = inOut[0];
            INDArray labels = inOut[1];
            log.info(" ***** Starting test: {} *****", testName);
            //                System.out.println(Arrays.toString(labels.data().asDouble()));
            //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
            //                System.out.println(net.score(new DataSet(input,labels)));
            boolean gradOK;
            try {
                gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
            } catch (Exception e) {
                e.printStackTrace();
                failed.add(testName + "\t" + "EXCEPTION");
                continue;
            }
            if (gradOK) {
                passed.add(testName);
            } else {
                failed.add(testName);
            }
            System.out.println("\n\n");
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) LossLayer(org.deeplearning4j.nn.conf.layers.LossLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 3 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionWeightedGradientCheck.

@Test
public void lossFunctionWeightedGradientCheck() {
    INDArray[] weights = new INDArray[] { Nd4j.create(new double[] { 0.2, 0.3, 0.5 }), Nd4j.create(new double[] { 1.0, 0.5, 2.0 }) };
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (INDArray w : weights) {
        ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(w), new LossL1(w), new LossL1(w), new LossL2(w), new LossL2(w), new LossMAE(w), new LossMAE(w), new LossMAPE(w), new LossMAPE(w), new LossMCXENT(w), new LossMSE(w), new LossMSE(w), new LossMSLE(w), new LossMSLE(w), new LossNegativeLogLikelihood(w), new LossNegativeLogLikelihood(w) };
        String[] outputActivationFn = new String[] { //xent
        "sigmoid", //l1
        "tanh", //l1 + softmax
        "softmax", //l2
        "tanh", //l2 + softmax
        "softmax", //mae
        "identity", //mae + softmax
        "softmax", //mape
        "identity", //mape + softmax
        "softmax", //mcxent
        "softmax", //mse
        "identity", //mse + softmax
        "softmax", //msle  -   requires positive labels/activations due to log
        "sigmoid", //msle + softmax
        "softmax", //nll
        "sigmoid", //nll + softmax
        "softmax" };
        int[] minibatchSizes = new int[] { 1, 3 };
        for (int i = 0; i < lossFunctions.length; i++) {
            for (int j = 0; j < minibatchSizes.length; j++) {
                String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j] + "; weights = " + w;
                Nd4j.getRandom().setSeed(12345);
                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-3, 3)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
                MultiLayerNetwork net = new MultiLayerNetwork(conf);
                net.init();
                INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, 3, 12345);
                INDArray input = inOut[0];
                INDArray labels = inOut[1];
                log.info(" ***** Starting test: {} *****", testName);
                //                System.out.println(Arrays.toString(labels.data().asDouble()));
                //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
                //                System.out.println(net.score(new DataSet(input,labels)));
                boolean gradOK;
                try {
                    gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                } catch (Exception e) {
                    e.printStackTrace();
                    failed.add(testName + "\t" + "EXCEPTION");
                    continue;
                }
                if (gradOK) {
                    passed.add(testName);
                } else {
                    failed.add(testName);
                }
                System.out.println("\n\n");
            }
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 4 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class VariationalAutoencoder method reconstructionError.

/**
     * Return the reconstruction error for this variational autoencoder.<br>
     * <b>NOTE (important):</b> This method is used ONLY for VAEs that have a standard neural network loss function (i.e.,
     * an {@link org.nd4j.linalg.lossfunctions.ILossFunction} instance such as mean squared error) instead of using a
     * probabilistic reconstruction distribution P(x|z) for the reconstructions (as presented in the VAE architecture by
     * Kingma and Welling).<br>
     * You can check if the VAE has a loss function using {@link #hasLossFunction()}<br>
     * Consequently, the reconstruction error is a simple deterministic function (no Monte-Carlo sampling is required,
     * unlike {@link #reconstructionProbability(INDArray, int)} and {@link #reconstructionLogProbability(INDArray, int)})
     *
     * @param data       The data to calculate the reconstruction error on
     * @return Column vector of reconstruction errors for each example (shape: [numExamples,1])
     */
public INDArray reconstructionError(INDArray data) {
    if (!hasLossFunction()) {
        throw new IllegalStateException("Cannot use reconstructionError method unless the variational autoencoder is " + "configured with a standard loss function (via LossFunctionWrapper). For VAEs utilizing a reconstruction " + "distribution, use the reconstructionProbability or reconstructionLogProbability methods");
    }
    INDArray pZXMean = activate(data, false);
    //Not probabilistic -> "mean" == output
    INDArray reconstruction = generateAtMeanGivenZ(pZXMean);
    if (reconstructionDistribution instanceof CompositeReconstructionDistribution) {
        CompositeReconstructionDistribution c = (CompositeReconstructionDistribution) reconstructionDistribution;
        return c.computeLossFunctionScoreArray(data, reconstruction);
    } else {
        LossFunctionWrapper lfw = (LossFunctionWrapper) reconstructionDistribution;
        ILossFunction lossFunction = lfw.getLossFunction();
        // so we don't want to apply it again. i.e., we are passing the output, not the pre-output.
        return lossFunction.computeScoreArray(data, reconstruction, new ActivationIdentity(), null);
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ActivationIdentity(org.nd4j.linalg.activations.impl.ActivationIdentity) CompositeReconstructionDistribution(org.deeplearning4j.nn.conf.layers.variational.CompositeReconstructionDistribution) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) LossFunctionWrapper(org.deeplearning4j.nn.conf.layers.variational.LossFunctionWrapper)

Example 5 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class BaseOutputLayer method getGradientsAndDelta.

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d();
    if (labels2d.size(1) != preOut.size(1)) {
        throw new DL4JInvalidInputException("Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ")");
    }
    //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
    Gradient gradient = new DefaultGradient();
    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
    //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0);
    biasGradView.assign(delta.sum(0));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);
    gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    return new Pair<>(gradient, delta);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)18 INDArray (org.nd4j.linalg.api.ndarray.INDArray)17 Test (org.junit.Test)6 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)5 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4 ArrayList (java.util.ArrayList)3 Pair (org.deeplearning4j.berkeley.Pair)3 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)3 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)3 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)3 Gradient (org.deeplearning4j.nn.gradient.Gradient)3 Activation (org.nd4j.linalg.activations.Activation)3 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)2 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)1 LossLayer (org.deeplearning4j.nn.conf.layers.LossLayer)1 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)1