Search in sources :

Example 56 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class ParameterServerParallelWrapperTest method testWrapper.

@Test
public void testWrapper() throws Exception {
    int nChannels = 1;
    int outputNum = 10;
    // for GPU you usually want to have higher batchSize
    int batchSize = 128;
    int nEpochs = 10;
    int iterations = 1;
    int seed = 123;
    log.info("Load data....");
    DataSetIterator mnistTrain = new MnistDataSetIterator(batchSize, 1000);
    DataSetIterator mnistTest = new MnistDataSetIterator(batchSize, false, 12345);
    log.info("Build model....");
    MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).regularization(true).l2(0.0005).learningRate(//.biasLearningRate(0.02)
    0.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(nChannels).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()).layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(outputNum).activation(Activation.SOFTMAX).build()).setInputType(InputType.convolutionalFlat(28, 28, 1)).backprop(true).pretrain(false);
    MultiLayerConfiguration conf = builder.build();
    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    ParameterServerParallelWrapper parameterServerParallelWrapper = ParameterServerParallelWrapper.builder().model(model).multiLayerNetwork(model).numEpochs(10).numWorkers(Runtime.getRuntime().availableProcessors()).statusServerPort(33000).preFetchSize(3).build();
    parameterServerParallelWrapper.fit(mnistTrain);
    parameterServerParallelWrapper.close();
    Thread.sleep(30000);
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) SubsamplingLayer(org.deeplearning4j.nn.conf.layers.SubsamplingLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ConvolutionLayer(org.deeplearning4j.nn.conf.layers.ConvolutionLayer) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) DataSetIterator(org.nd4j.linalg.dataset.api.iterator.DataSetIterator) MnistDataSetIterator(org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator) Test(org.junit.Test)

Example 57 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class ModelSerializer method taskByModel.

/**
     *
     * @param model
     * @return
     */
public static Task taskByModel(Model model) {
    Task task = new Task();
    try {
        task.setArchitectureType(Task.ArchitectureType.RECURRENT);
        if (model instanceof ComputationGraph) {
            task.setNetworkType(Task.NetworkType.ComputationalGraph);
            ComputationGraph network = (ComputationGraph) model;
            try {
                if (network.getLayers() != null && network.getLayers().length > 0) {
                    for (Layer layer : network.getLayers()) {
                        if (layer instanceof RBM || layer instanceof org.deeplearning4j.nn.layers.feedforward.rbm.RBM) {
                            task.setArchitectureType(Task.ArchitectureType.RBM);
                            break;
                        }
                        if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) {
                            task.setArchitectureType(Task.ArchitectureType.CONVOLUTION);
                            break;
                        } else if (layer.type().equals(Layer.Type.RECURRENT) || layer.type().equals(Layer.Type.RECURSIVE)) {
                            task.setArchitectureType(Task.ArchitectureType.RECURRENT);
                            break;
                        }
                    }
                } else
                    task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
            } catch (Exception e) {
            // do nothing here
            }
        } else if (model instanceof MultiLayerNetwork) {
            task.setNetworkType(Task.NetworkType.MultilayerNetwork);
            MultiLayerNetwork network = (MultiLayerNetwork) model;
            try {
                if (network.getLayers() != null && network.getLayers().length > 0) {
                    for (Layer layer : network.getLayers()) {
                        if (layer instanceof RBM || layer instanceof org.deeplearning4j.nn.layers.feedforward.rbm.RBM) {
                            task.setArchitectureType(Task.ArchitectureType.RBM);
                            break;
                        }
                        if (layer.type().equals(Layer.Type.CONVOLUTIONAL)) {
                            task.setArchitectureType(Task.ArchitectureType.CONVOLUTION);
                            break;
                        } else if (layer.type().equals(Layer.Type.RECURRENT) || layer.type().equals(Layer.Type.RECURSIVE)) {
                            task.setArchitectureType(Task.ArchitectureType.RECURRENT);
                            break;
                        }
                    }
                } else
                    task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
            } catch (Exception e) {
            // do nothing here
            }
        }
        return task;
    } catch (Exception e) {
        task.setArchitectureType(Task.ArchitectureType.UNKNOWN);
        task.setNetworkType(Task.NetworkType.DenseNetwork);
        return task;
    }
}
Also used : Task(org.nd4j.linalg.heartbeat.reports.Task) Layer(org.deeplearning4j.nn.api.Layer) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) RBM(org.deeplearning4j.nn.conf.layers.RBM) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork)

Example 58 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsMasking method testBidirectionalLSTMMasking.

@Test
public void testBidirectionalLSTMMasking() {
    //Basic test of GravesLSTM layer
    Nd4j.getRandom().setSeed(12345L);
    int timeSeriesLength = 5;
    int nIn = 5;
    int layerSize = 4;
    int nOut = 3;
    int miniBatchSize = 3;
    INDArray[] masks = new INDArray[] { null, Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 1 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } }), Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 1, 1, 1 }, { 0, 0, 1, 1, 1 } }) };
    int testNum = 0;
    for (INDArray mask : masks) {
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).build()).pretrain(false).backprop(true).build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();
        Random r = new Random(12345L);
        INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < nIn; j++) {
                for (int k = 0; k < timeSeriesLength; k++) {
                    input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
                }
            }
        }
        INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
        for (int i = 0; i < miniBatchSize; i++) {
            for (int j = 0; j < nIn; j++) {
                labels.putScalar(i, r.nextInt(nOut), j, 1.0);
            }
        }
        mln.setLayerMaskArrays(mask, mask);
        if (PRINT_RESULTS) {
            System.out.println("testBidirectionalLSTMMasking() - testNum = " + testNum++);
            for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
        }
        boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
        assertTrue(gradOK);
    }
}
Also used : MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Random(java.util.Random) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 59 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class GradientCheckTestsMasking method testPerOutputMaskingRnn.

@Test
public void testPerOutputMaskingRnn() {
    //For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
    // 2d masks (used for per *example* masking)
    int nIn = 4;
    int layerSize = 4;
    int nOut = 4;
    //1 example, TS length 3
    INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
    //1 example, TS length 1
    INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
    //3 examples, TS length 3
    INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
    1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
    INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), //                new LossCosineProximity(),    //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
    new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), //                new LossMCXENT(),             //Per output masking on MCXENT+Softmax: not yet supported
    new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    Activation[] act = new Activation[] { //XENT
    Activation.SIGMOID, //Hinge
    Activation.TANH, //KLD
    Activation.SIGMOID, //KLD + softmax
    Activation.SOFTMAX, //L1
    Activation.TANH, //L2
    Activation.TANH, //MAE
    Activation.TANH, //MAE + softmax
    Activation.SOFTMAX, //MAPE
    Activation.TANH, //MAPE + softmax
    Activation.SOFTMAX, //MCXENT + sigmoid
    Activation.SIGMOID, //MSE
    Activation.TANH, //MSE + softmax
    Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
    Activation.SIGMOID, //MSLE + softmax
    Activation.SOFTMAX, //NLL
    Activation.SIGMOID, //Poisson
    Activation.SIGMOID, //Squared hinge
    Activation.TANH };
    for (INDArray labelMask : labelMasks) {
        int minibatch = labelMask.size(0);
        int tsLength = labelMask.size(2);
        for (int i = 0; i < lossFunctions.length; i++) {
            ILossFunction lf = lossFunctions[i];
            Activation a = act[i];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            net.setLayerMaskArrays(null, labelMask);
            INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
            INDArray features = fl[0];
            INDArray labels = fl[1];
            String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
            System.out.println(msg);
            boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
            assertTrue(msg, gradOK);
            //Check the equivalent compgraph:
            Nd4j.getRandom().setSeed(12345);
            ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
            ComputationGraph graph = new ComputationGraph(cg);
            graph.init();
            net.setLayerMaskArrays(null, labelMask);
            gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
            assertTrue(msg + " (compgraph)", gradOK);
        }
    }
}
Also used : Activation(org.nd4j.linalg.activations.Activation) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NormalDistribution(org.deeplearning4j.nn.conf.distribution.NormalDistribution) ComputationGraphConfiguration(org.deeplearning4j.nn.conf.ComputationGraphConfiguration) Test(org.junit.Test)

Example 60 with MultiLayerNetwork

use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.

the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.

@Test
public void lossFunctionGradientCheckLossLayer() {
    ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
    String[] outputActivationFn = new String[] { //xent
    "sigmoid", //xent
    "sigmoid", //cosine
    "tanh", //hinge -> trying to predict 1 or -1
    "tanh", //kld -> probab so should be between 0 and 1
    "sigmoid", //kld + softmax
    "softmax", //l1
    "tanh", //l1 + softmax
    "softmax", //l2
    "tanh", //l2 + softmax
    "softmax", //mae
    "identity", //mae + softmax
    "softmax", //mape
    "identity", //mape + softmax
    "softmax", //mcxent
    "softmax", //mse
    "identity", //mse + softmax
    "softmax", //msle  -   requires positive labels/activations due to log
    "sigmoid", //msle + softmax
    "softmax", //nll
    "sigmoid", //nll + softmax
    "softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
    "sigmoid", //squared hinge
    "tanh" };
    int[] nOut = new int[] { //xent
    1, //xent
    3, //cosine
    5, //hinge
    3, //kld
    3, //kld + softmax
    3, //l1
    3, //l1 + softmax
    3, //l2
    3, //l2 + softmax
    3, //mae
    3, //mae + softmax
    3, //mape
    3, //mape + softmax
    3, //mcxent
    3, //mse
    3, //mse + softmax
    3, //msle
    3, //msle + softmax
    3, //nll
    3, //nll + softmax
    3, //poisson
    3, //squared hinge
    3 };
    int[] minibatchSizes = new int[] { 1, 3 };
    //        int[] minibatchSizes = new int[]{3};
    List<String> passed = new ArrayList<>();
    List<String> failed = new ArrayList<>();
    for (int i = 0; i < lossFunctions.length; i++) {
        for (int j = 0; j < minibatchSizes.length; j++) {
            String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
            Nd4j.getRandom().setSeed(12345);
            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();
            assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
            INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
            INDArray input = inOut[0];
            INDArray labels = inOut[1];
            log.info(" ***** Starting test: {} *****", testName);
            //                System.out.println(Arrays.toString(labels.data().asDouble()));
            //                System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
            //                System.out.println(net.score(new DataSet(input,labels)));
            boolean gradOK;
            try {
                gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
            } catch (Exception e) {
                e.printStackTrace();
                failed.add(testName + "\t" + "EXCEPTION");
                continue;
            }
            if (gradOK) {
                passed.add(testName);
            } else {
                failed.add(testName);
            }
            System.out.println("\n\n");
        }
    }
    System.out.println("---- Passed ----");
    for (String s : passed) {
        System.out.println(s);
    }
    System.out.println("---- Failed ----");
    for (String s : failed) {
        System.out.println(s);
    }
    assertEquals("Tests failed", 0, failed.size());
}
Also used : ArrayList(java.util.ArrayList) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) LossLayer(org.deeplearning4j.nn.conf.layers.LossLayer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Test(org.junit.Test)

Aggregations

MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)326 Test (org.junit.Test)277 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)206 INDArray (org.nd4j.linalg.api.ndarray.INDArray)166 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)111 DataSet (org.nd4j.linalg.dataset.DataSet)91 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)70 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)49 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)43 ScoreIterationListener (org.deeplearning4j.optimize.listeners.ScoreIterationListener)41 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)40 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)38 Random (java.util.Random)34 MnistDataSetIterator (org.deeplearning4j.datasets.iterator.impl.MnistDataSetIterator)30 ConvolutionLayer (org.deeplearning4j.nn.conf.layers.ConvolutionLayer)28 DL4JException (org.deeplearning4j.exception.DL4JException)20 Layer (org.deeplearning4j.nn.api.Layer)20 ClassPathResource (org.nd4j.linalg.io.ClassPathResource)20 File (java.io.File)19 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)19