Search in sources :

Example 76 with ComputationGraph

use of org.deeplearning4j.nn.graph.ComputationGraph in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayerTest method getGraph.

private ComputationGraph getGraph(int numLabels, double lambda) {
    Nd4j.getRandom().setSeed(12345);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("input1").addLayer("l1", new DenseLayer.Builder().nIn(4).nOut(5).activation(Activation.RELU).build(), "input1").addLayer("lossLayer", new CenterLossOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(numLabels).lambda(lambda).activation(Activation.SOFTMAX).build(), "l1").setOutputs("lossLayer").pretrain(false).backprop(true).build();
    ComputationGraph graph = new ComputationGraph(conf);
    graph.init();
    return graph;
}
Also used : NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph)

Example 77 with ComputationGraph

use of org.deeplearning4j.nn.graph.ComputationGraph in project deeplearning4j by deeplearning4j.

the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessorGraph.

@Test
public void testRnnTimeStepWithPreprocessorGraph() {
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "0").addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1").setOutputs("2").inputPreProcessor("0", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    INDArray in = Nd4j.rand(1, 10);
    net.rnnTimeStep(in);
}
Also used : GravesLSTM(org.deeplearning4j.nn.layers.recurrent.GravesLSTM) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 78 with ComputationGraph

use of org.deeplearning4j.nn.graph.ComputationGraph in project deeplearning4j by deeplearning4j.

the class TestMasking method testPerOutputMaskingMLN.

@Test
public void testPerOutputMaskingMLN() {
    //Idea: for per-output masking, the contents of the masked label entries should make zero difference to either
    // the score or the gradients
    int nIn = 6;
    int layerSize = 4;
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
    INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
    INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int nOut = labelMask.size(1);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            net.setInput(features);
            net.setLabels(labels);
            net.computeGradientAndScore();
            double score1 = net.score();
            INDArray grad1 = net.gradient().gradient();
            //Now: change the label values for the masked steps. The
            INDArray maskZeroLocations = Nd4j.getExecutioner().execAndReturn(new Not(labelMask.dup()));
            INDArray rand = Nd4j.rand(maskZeroLocations.shape()).muli(0.5);
            //Only the masked values are changed
            INDArray newLabels = labels.add(rand.muli(maskZeroLocations));
            net.setLabels(newLabels);
            net.computeGradientAndScore();
            assertNotEquals(labels, newLabels);
            double score2 = net.score();
            INDArray grad2 = net.gradient().gradient();
            assertEquals(score1, score2, 1e-6);
            assertEquals(grad1, grad2);
            //Do the same for CompGraph
            ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
            ComputationGraph graph = new ComputationGraph(conf2);
            graph.init();
            graph.setLayerMaskArrays(null, new INDArray[] { labelMask });
            graph.setInputs(features);
            graph.setLabels(labels);
            graph.computeGradientAndScore();
            double gScore1 = graph.score();
            INDArray gGrad1 = graph.gradient().gradient();
            graph.setLabels(newLabels);
            graph.computeGradientAndScore();
            double gScore2 = graph.score();
            INDArray gGrad2 = graph.gradient().gradient();
            assertEquals(gScore1, gScore2, 1e-6);
            assertEquals(gGrad1, gGrad2);
        }
    }
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) RnnOutputLayer(org.deeplearning4j.nn.conf.layers.RnnOutputLayer) Activation(org.nd4j.linalg.activations.Activation) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) Not(org.nd4j.linalg.api.ops.impl.transforms.Not) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) Test(org.junit.Test)

Example 79 with ComputationGraph

use of org.deeplearning4j.nn.graph.ComputationGraph in project deeplearning4j by deeplearning4j.

the class TransferLearningCompGraphTest method simpleFineTune.

@Test
public void simpleFineTune() {
    long rng = 12345L;
    DataSet randomData = new DataSet(Nd4j.rand(10, 4), Nd4j.rand(10, 3));
    //original conf
    ComputationGraphConfiguration confToChange = new NeuralNetConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.LBFGS).updater(Updater.NESTEROVS).momentum(0.99).learningRate(0.01).graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)).addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In").addLayer("layer1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build(), "layer0").setOutputs("layer1").build();
    //conf with learning parameters changed
    ComputationGraphConfiguration expectedConf = new NeuralNetConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP).learningRate(0.2).regularization(true).graphBuilder().addInputs("layer0In").setInputTypes(InputType.feedForward(4)).addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).build(), "layer0In").addLayer("layer1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(3).nOut(3).build(), "layer0").setOutputs("layer1").build();
    ComputationGraph expectedModel = new ComputationGraph(expectedConf);
    expectedModel.init();
    ComputationGraph modelToFineTune = new ComputationGraph(expectedConf);
    modelToFineTune.init();
    modelToFineTune.setParams(expectedModel.params());
    //model after applying changes with transfer learning
    ComputationGraph modelNow = new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(new FineTuneConfiguration.Builder().seed(rng).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.RMSPROP).learningRate(0.2).regularization(true).build()).build();
    //Check json
    assertEquals(expectedConf.toJson(), modelNow.getConfiguration().toJson());
    //Check params after fit
    modelNow.fit(randomData);
    expectedModel.fit(randomData);
    assertEquals(modelNow.score(), expectedModel.score(), 1e-8);
    assertEquals(modelNow.params(), expectedModel.params());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DataSet(org.nd4j.linalg.dataset.DataSet) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Example 80 with ComputationGraph

use of org.deeplearning4j.nn.graph.ComputationGraph in project deeplearning4j by deeplearning4j.

the class TransferLearningCompGraphTest method testAllWithCNN.

@Test
public void testAllWithCNN() {
    DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10));
    ComputationGraph modelToFineTune = new ComputationGraph(new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3)).addLayer("layer0", new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build(), "layer0In").addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build(), "layer0").addLayer("layer2", new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build(), "layer1").addLayer("layer3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build(), "layer2").addLayer("layer4", new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build(), "layer3").addLayer("layer5", new DenseLayer.Builder().activation(Activation.RELU).nOut(250).build(), "layer4").addLayer("layer6", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(100).activation(Activation.SOFTMAX).build(), "layer5").setOutputs("layer5").backprop(true).pretrain(false).build());
    modelToFineTune.init();
    //this will override the learning configuration set in the model
    NeuralNetConfiguration.Builder overallConf = new NeuralNetConfiguration.Builder().seed(456).learningRate(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
    FineTuneConfiguration fineTuneConfiguration = new FineTuneConfiguration.Builder().seed(456).learningRate(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).build();
    ComputationGraph modelNow = new TransferLearning.GraphBuilder(modelToFineTune).fineTuneConfiguration(fineTuneConfiguration).setFeatureExtractor("layer1").nOutReplace("layer4", 600, WeightInit.XAVIER).removeVertexAndConnections("layer5").removeVertexAndConnections("layer6").addLayer("layer5", new DenseLayer.Builder().activation(Activation.RELU).nIn(600).nOut(300).build(), "layer4").addLayer("layer6", new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build(), "layer5").addLayer("layer7", new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build(), "layer6").addLayer("layer8", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).nIn(50).nOut(10).build(), "layer7").setOutputs("layer8").build();
    ComputationGraph modelExpectedArch = new ComputationGraph(overallConf.graphBuilder().addInputs("layer0In").setInputTypes(InputType.convolutionalFlat(28, 28, 3)).addLayer("layer0", new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build(), "layer0In").addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build(), "layer0").addLayer("layer2", new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build(), "layer1").addLayer("layer3", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build(), "layer2").addLayer("layer4", new DenseLayer.Builder().activation(Activation.RELU).nOut(600).build(), "layer3").addLayer("layer5", new DenseLayer.Builder().activation(Activation.RELU).nOut(300).build(), "layer4").addLayer("layer6", new DenseLayer.Builder().activation(Activation.RELU).nOut(150).build(), "layer5").addLayer("layer7", new DenseLayer.Builder().activation(Activation.RELU).nOut(50).build(), "layer6").addLayer("layer8", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "layer7").setOutputs("layer8").backprop(true).pretrain(false).build());
    modelExpectedArch.init();
    modelExpectedArch.getVertex("layer0").setLayerAsFrozen();
    modelExpectedArch.getVertex("layer1").setLayerAsFrozen();
    assertEquals(modelExpectedArch.getConfiguration().toJson(), modelNow.getConfiguration().toJson());
    modelNow.setParams(modelExpectedArch.params());
    int i = 0;
    while (i < 5) {
        modelExpectedArch.fit(randomData);
        modelNow.fit(randomData);
        i++;
    }
    assertEquals(modelExpectedArch.params(), modelNow.params());
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DataSet(org.nd4j.linalg.dataset.DataSet) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Test(org.junit.Test)

Aggregations

ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)109 Test (org.junit.Test)73 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)63 INDArray (org.nd4j.linalg.api.ndarray.INDArray)62 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)36 DataSet (org.nd4j.linalg.dataset.DataSet)25 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)22 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)19 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)19 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)17 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)17 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)14 Layer (org.deeplearning4j.nn.api.Layer)14 Random (java.util.Random)11 InMemoryModelSaver (org.deeplearning4j.earlystopping.saver.InMemoryModelSaver)10 MaxEpochsTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxEpochsTerminationCondition)10 TrainingMaster (org.deeplearning4j.spark.api.TrainingMaster)10 MaxTimeIterationTerminationCondition (org.deeplearning4j.earlystopping.termination.MaxTimeIterationTerminationCondition)9 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)9