Search in sources :

Example 46 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class VaeGradientCheckTests method testVaePretrain.

@Test
public void testVaePretrain() {
    //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
    String[] activFns = { "identity", "identity", "tanh", "tanh" };
    String[] pzxAfns = { "identity", "tanh", "identity", "tanh" };
    String[] pxzAfns = { "tanh", "identity", "tanh", "identity" };
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
    int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
    Nd4j.getRandom().setSeed(12345);
    for (int minibatch : new int[] { 1, 5 }) {
        INDArray features = Nd4j.rand(minibatch, 4);
        for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
            int[] encoderSizes = encoderLayerSizes[ls];
            int[] decoderSizes = decoderLayerSizes[ls];
            for (int j = 0; j < activFns.length; j++) {
                String afn = activFns[j];
                String pzxAfn = pzxAfns[j];
                String pxzAfn = pxzAfns[j];
                //Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long...
                double l2 = l2vals[j];
                double l1 = l1vals[j];
                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).pzxActivationFunction(pzxAfn).reconstructionDistribution(new GaussianReconstructionDistribution(pxzAfn)).activation(afn).updater(Updater.SGD).build()).pretrain(true).backprop(false).build();
                MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                mln.init();
                mln.initGradientsView();
                org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
                String msg = "testVaePretrain() - activationFn=" + afn + ", p(z|x) afn = " + pzxAfn + ", p(x|z) afn = " + pxzAfn + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
                if (PRINT_RESULTS) {
                    System.out.println(msg);
                    for (int l = 0; l < mln.getnLayers(); l++) System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams());
                }
                boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, 12345);
                assertTrue(msg, gradOK);
            }
        }
    }
}
Also used : MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) org.deeplearning4j.nn.api(org.deeplearning4j.nn.api) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 47 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testForwardBasicIris.

@Test
public void testForwardBasicIris() {
    ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
    ComputationGraph graph = new ComputationGraph(configuration);
    graph.init();
    MultiLayerConfiguration mlc = getIrisMLNConfiguration();
    MultiLayerNetwork net = new MultiLayerNetwork(mlc);
    net.init();
    DataSetIterator iris = new IrisDataSetIterator(150, 150);
    DataSet ds = iris.next();
    graph.setInput(0, ds.getFeatureMatrix());
    Map<String, INDArray> activations = graph.feedForward(false);
    //2 layers + 1 input node
    assertEquals(3, activations.size());
    assertTrue(activations.containsKey("input"));
    assertTrue(activations.containsKey("firstLayer"));
    assertTrue(activations.containsKey("outputLayer"));
    //Now: set parameters of both networks to be identical. Then feedforward, and check we get the same outputs
    Nd4j.getRandom().setSeed(12345);
    int nParams = getNumParams();
    INDArray params = Nd4j.rand(1, nParams);
    graph.setParams(params.dup());
    net.setParams(params.dup());
    List<INDArray> mlnAct = net.feedForward(ds.getFeatureMatrix(), false);
    activations = graph.feedForward(ds.getFeatureMatrix(), false);
    assertEquals(mlnAct.get(0), activations.get("input"));
    assertEquals(mlnAct.get(1), activations.get("firstLayer"));
    assertEquals(mlnAct.get(2), activations.get("outputLayer"));
}
Also used : IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 48 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testIrisFit.

@Test
public void testIrisFit() {
    ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
    ComputationGraph graph = new ComputationGraph(configuration);
    graph.init();
    MultiLayerConfiguration mlnConfig = getIrisMLNConfiguration();
    MultiLayerNetwork net = new MultiLayerNetwork(mlnConfig);
    net.init();
    Nd4j.getRandom().setSeed(12345);
    int nParams = getNumParams();
    INDArray params = Nd4j.rand(1, nParams);
    graph.setParams(params.dup());
    net.setParams(params.dup());
    DataSetIterator iris = new IrisDataSetIterator(75, 150);
    net.fit(iris);
    iris.reset();
    graph.fit(iris);
    //Check that parameters are equal for both models after fitting:
    INDArray paramsMLN = net.params();
    INDArray paramsGraph = graph.params();
    assertNotEquals(params, paramsGraph);
    assertEquals(paramsMLN, paramsGraph);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 49 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class TestComputationGraphNetwork method testScoringDataSet.

@Test
public void testScoringDataSet() {
    ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
    ComputationGraph graph = new ComputationGraph(configuration);
    graph.init();
    MultiLayerConfiguration mlc = getIrisMLNConfiguration();
    MultiLayerNetwork net = new MultiLayerNetwork(mlc);
    net.init();
    DataSetIterator iris = new IrisDataSetIterator(150, 150);
    DataSet ds = iris.next();
    //Now: set parameters of both networks to be identical. Then feedforward, and check we get the same score
    Nd4j.getRandom().setSeed(12345);
    int nParams = getNumParams();
    INDArray params = Nd4j.rand(1, nParams);
    graph.setParams(params.dup());
    net.setParams(params.dup());
    double scoreMLN = net.score(ds, false);
    double scoreCG = graph.score(ds, false);
    assertEquals(scoreMLN, scoreCG, 1e-4);
}
Also used : IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) IrisDataSetIterator(org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) RecordReaderMultiDataSetIterator(org.deeplearning4j.datasets.datavec.RecordReaderMultiDataSetIterator) MultiDataSetIterator(org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator) Test(org.junit.Test)

Example 50 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class VaeGradientCheckTests method testVaeAsMLP.

@Test
public void testVaeAsMLP() {
    //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
    //This gradient check tests this part
    //activation functions such as relu and hardtanh: may randomly fail due to discontinuities
    String[] activFns = { "identity", "tanh" };
    LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
    //i.e., lossFunctions[i] used with outputActivations[i] here
    String[] outputActivations = { "softmax", "tanh" };
    //use l2vals[i] with l1vals[i]
    double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
    double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
    double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
    double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
    int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
    int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
    Nd4j.getRandom().setSeed(12345);
    for (int minibatch : new int[] { 1, 5 }) {
        INDArray input = Nd4j.rand(minibatch, 4);
        INDArray labels = Nd4j.create(minibatch, 3);
        for (int i = 0; i < minibatch; i++) {
            labels.putScalar(i, i % 3, 1.0);
        }
        for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
            int[] encoderSizes = encoderLayerSizes[ls];
            int[] decoderSizes = decoderLayerSizes[ls];
            for (String afn : activFns) {
                for (int i = 0; i < lossFunctions.length; i++) {
                    for (int k = 0; k < l2vals.length; k++) {
                        LossFunction lf = lossFunctions[i];
                        String outputActivation = outputActivations[i];
                        double l2 = l2vals[k];
                        double l1 = l1vals[k];
                        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()).pretrain(false).backprop(true).build();
                        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
                        mln.init();
                        String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
                        if (PRINT_RESULTS) {
                            System.out.println(msg);
                            for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
                        }
                        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
                        assertTrue(msg, gradOK);
                    }
                }
            }
        }
    }
}
Also used : MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) LossFunction(org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)326 Test (org.junit.Test)277 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)206 INDArray (org.nd4j.linalg.api.ndarray.INDArray)166 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)111 DataSet (org.nd4j.linalg.dataset.DataSet)91 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)70 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)49 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)43 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)41 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)40 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)38 Random (java.util.Random)34 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)30 ConvolutionLayer (org.deeplearning4j.nn.conf.layers.ConvolutionLayer)28 DL4JException (org.deeplearning4j.exception.DL4JException)20 Layer (org.deeplearning4j.nn.api.Layer)20 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)20 File (java.io.File)19 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)19