Search in sources :

Example 11 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsMasking method testPerOutputMaskingMLP.

@Test
public void testPerOutputMaskingMLP() {
    int nIn = 6;
    int layerSize = 4;
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
    INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
    INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int nOut = labelMask.size(1);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            String msg = "testPerOutputMaskingMLP(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
            System.out.println(msg);
            boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
            assertTrue(msg, gradOK);
        }
    }
}
Also used : Activation(org.nd4j.linalg.activations.Activation) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) Test(org.junit.Test)

Example 12 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionGradientCheck.

@Test
public void lossFunctionGradientCheck() {
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    String[] outputActivationFn = new String[] { //xent
    "sigmoid", //xent
    "sigmoid", //cosine
    "tanh", //hinge -> trying to predict 1 or -1
    "tanh", //kld -> probab so should be between 0 and 1
    "sigmoid", //kld + softmax
    "softmax", //l1
    "tanh", //l1
    "rationaltanh", //l1 + softmax
    "softmax", //l2
    "tanh", //l2 + softmax
    "softmax", //mae
    "identity", //mae + softmax
    "softmax", //mape
    "identity", //mape + softmax
    "softmax", //mcxent
    "softmax", //mse
    "identity", //mse + softmax
    "softmax", //msle  -   requires positive labels/activations due to log
    "sigmoid", //msle + softmax
    "softmax", //nll
    "sigmoid", //nll + softmax
    "softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
    "sigmoid", //squared hinge
    "tanh" };
    int[] nOut = new int[] { //xent
    1, //xent
    3, //cosine
    5, //hinge
    3, //kld
    3, //kld + softmax
    3, //l1
    3, //l1
    3, //l1 + softmax
    3, //l2
    3, //l2 + softmax
    3, //mae
    3, //mae + softmax
    3, //mape
    3, //mape + softmax
    3, //mcxent
    3, //mse
    3, //mse + softmax
    3, //msle
    3, //msle + softmax
    3, //nll
    3, //nll + softmax
    3, //poisson
    3, //squared hinge
    3 };
    int[] minibatchSizes = new int[] { 1, 3 };
    //        int[] minibatchSizes = new int[]{3};
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (int i = 0; i < lossFunctions.length; i++) {
        for (int j = 0; j < minibatchSizes.length; j++) {
            String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).nIn(4).nOut(nOut[i]).build()).pretrain(false).backprop(true).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
            INDArray input = inOut[0];
            INDArray labels = inOut[1];
            log.info(" ***** Starting test: {} *****", testName);
            //                System.out.println(Arrays.toString(labels.data().asDouble()));
            //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
            //                System.out.println(net.score(new DataSet(input,labels)));
            boolean gradOK;
            try {
                gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
            } catch (Exception e) {
                e.printStackTrace();
                failed.add(testName + "\t" + "EXCEPTION");
                continue;
            }
            if (gradOK) {
                passed.add(testName);
            } else {
                failed.add(testName);
            }
            System.out.println("\n\n");
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Example 13 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class TestMasking method testPerOutputMaskingMLN.

@Test
public void testPerOutputMaskingMLN() {
    //Idea: for per-output masking, the contents of the masked label entries should make zero difference to either
    // the score or the gradients
    int nIn = 6;
    int layerSize = 4;
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
    INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
    INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int nOut = labelMask.size(1);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            net.setInput(features);
            net.setLabels(labels);
            net.computeGradientAndScore();
            double score1 = net.score();
            INDArray grad1 = net.gradient().gradient();
            //Now: change the label values for the masked steps. The
            INDArray maskZeroLocations = Nd4j.getExecutioner().execAndReturn(new Not(labelMask.dup()));
            INDArray rand = Nd4j.rand(maskZeroLocations.shape()).muli(0.5);
            //Only the masked values are changed
            INDArray newLabels = labels.add(rand.muli(maskZeroLocations));
            net.setLabels(newLabels);
            net.computeGradientAndScore();
            assertNotEquals(labels, newLabels);
            double score2 = net.score();
            INDArray grad2 = net.gradient().gradient();
            assertEquals(score1, score2, 1e-6);
            assertEquals(grad1, grad2);
            //Do the same for CompGraph
            ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
            ComputationGraph graph = new ComputationGraph(conf2);
            graph.init();
            graph.setLayerMaskArrays(null, new INDArray[] { labelMask });
            graph.setInputs(features);
            graph.setLabels(labels);
            graph.computeGradientAndScore();
            double gScore1 = graph.score();
            INDArray gGrad1 = graph.gradient().gradient();
            graph.setLabels(newLabels);
            graph.computeGradientAndScore();
            double gScore2 = graph.score();
            INDArray gGrad2 = graph.gradient().gradient();
            assertEquals(gScore1, gScore2, 1e-6);
            assertEquals(gGrad1, gGrad2);
        }
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Activation(org.nd4j.linalg.activations.Activation) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) Not(org.nd4j.linalg.api.ops.impl.transforms.Not) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) Test(org.junit.Test)

Example 14 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayer method computeScore.

/** Compute score after labels and input have been set.
     * @param fullNetworkL1 L1 regularization term for the entire network
     * @param fullNetworkL2 L2 regularization term for the entire network
     * @param training whether score should be calculated at train or test time (this affects things like application of
     *                 dropout, etc)
     * @return score (loss function)
     */
@Override
public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) {
    if (input == null || labels == null)
        throw new IllegalStateException("Cannot calculate score without input and labels");
    this.fullNetworkL1 = fullNetworkL1;
    this.fullNetworkL2 = fullNetworkL2;
    INDArray preOut = preOutput2d(training);
    // center loss has two components
    // the first enforces inter-class dissimilarity, the second intra-class dissimilarity (squared l2 norm of differences)
    ILossFunction interClassLoss = layerConf().getLossFn();
    // calculate the intra-class score component
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray centersForExamples = labels.mmul(centers);
    //        double intraClassScore = intraClassLoss.computeScore(centersForExamples, input, Activation.IDENTITY.getActivationFunction(), maskArray, false);
    INDArray norm2DifferenceSquared = input.sub(centersForExamples).norm2(1);
    norm2DifferenceSquared.muli(norm2DifferenceSquared);
    double sum = norm2DifferenceSquared.sumNumber().doubleValue();
    double lambda = layerConf().getLambda();
    double intraClassScore = lambda / 2.0 * sum;
    //        intraClassScore = intraClassScore * layerConf().getLambda() / 2;
    if (System.getenv("PRINT_CENTERLOSS") != null) {
        System.out.println("Center loss is " + intraClassScore);
    }
    // now calculate the inter-class score component
    double interClassScore = interClassLoss.computeScore(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray, false);
    double score = interClassScore + intraClassScore;
    score += fullNetworkL1 + fullNetworkL2;
    score /= getInputMiniBatchSize();
    this.score = score;
    return score;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction)

Example 15 with ILossFunction

use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.

the class RnnOutputLayer method computeScoreForExamples.

/**Compute the score for each example individually, after labels and input have been set.
     *
     * @param fullNetworkL1 L1 regularization term for the entire network (or, 0.0 to not include regularization)
     * @param fullNetworkL2 L2 regularization term for the entire network (or, 0.0 to not include regularization)
     * @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
     */
@Override
public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) {
    if (input == null || labels == null)
        throw new IllegalStateException("Cannot calculate score without input and labels");
    INDArray preOut = preOutput2d(false);
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray scoreArray = lossFunction.computeScoreArray(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);
    //scoreArray: shape [minibatch*timeSeriesLength, 1]
    //Reshape it to [minibatch, timeSeriesLength] then sum over time step
    INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, input.size(0));
    INDArray summedScores = scoreArrayTs.sum(1);
    double l1l2 = fullNetworkL1 + fullNetworkL2;
    if (l1l2 != 0.0) {
        summedScores.addi(l1l2);
    }
    return summedScores;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction)

Aggregations

ILossFunction (org.nd4j.linalg.lossfunctions.ILossFunction)18 INDArray (org.nd4j.linalg.api.ndarray.INDArray)17 Test (org.junit.Test)6 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)5 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)4 ArrayList (java.util.ArrayList)3 Pair (org.deeplearning4j.berkeley.Pair)3 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)3 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)3 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)3 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)3 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)3 Gradient (org.deeplearning4j.nn.gradient.Gradient)3 Activation (org.nd4j.linalg.activations.Activation)3 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)2 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)2 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)1 LossLayer (org.deeplearning4j.nn.conf.layers.LossLayer)1 RnnOutputLayer (org.deeplearning4j.nn.conf.layers.RnnOutputLayer)1