Search in sources :

Example 1 with RnnToCnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor in project deeplearning4j by deeplearning4j.

the class GradientCheckTests method testGradientCnnFfRnn.

@Test
public void testGradientCnnFfRnn() {
    //Test gradients with CNN -> FF -> LSTM -> RnnOutputLayer
    //time series input/output (i.e., video classification or similar)
    int nChannelsIn = 3;
    //10px x 10px x 3 channels
    int inputSize = 10 * 10 * nChannelsIn;
    int miniBatchSize = 4;
    int timeSeriesLength = 10;
    int nClasses = 3;
    //Generate
    Nd4j.getRandom().setSeed(12345);
    INDArray input = Nd4j.rand(new int[] { miniBatchSize, inputSize, timeSeriesLength });
    INDArray labels = Nd4j.zeros(miniBatchSize, nClasses, timeSeriesLength);
    Random r = new Random(12345);
    for (int i = 0; i < miniBatchSize; i++) {
        for (int j = 0; j < timeSeriesLength; j++) {
            int idx = r.nextInt(nClasses);
            labels.putScalar(new int[] { i, idx, j }, 1.0);
        }
    }
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).seed(12345).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).nOut(5).stride(1, 1).activation(Activation.TANH).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build()).layer(2, new DenseLayer.Builder().nIn(5 * 5 * 5).nOut(4).activation(Activation.TANH).build()).layer(3, new GravesLSTM.Builder().nIn(4).nOut(3).activation(Activation.TANH).build()).layer(4, new RnnOutputLayer.Builder().lossFunction(LossFunction.MCXENT).nIn(3).nOut(nClasses).activation(Activation.SOFTMAX).build()).setInputType(InputType.convolutional(10, 10, 3)).pretrain(false).backprop(true).build();
    //Here: ConvolutionLayerSetup in config builder doesn't know that we are expecting time series input, not standard FF input -> override it here
    conf.getInputPreProcessors().put(0, new RnnToCnnPreProcessor(10, 10, 3));
    MultiLayerNetwork mln = new MultiLayerNetwork(conf);
    mln.init();
    System.out.println("Params per layer:");
    for (int i = 0; i < mln.getnLayers(); i++) {
        System.out.println("layer " + i + "\t" + mln.getLayer(i).numParams());
    }
    boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
    assertTrue(gradOK);
}
Also used : RnnToCnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor) UniformDistribution(org.deeplearning4j.nn.conf.distribution.UniformDistribution) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Random(java.util.Random) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 2 with RnnToCnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor in project deeplearning4j by deeplearning4j.

the class MultiLayerTest method testSummary.

@Test
public void testSummary() {
    int V_WIDTH = 130;
    int V_HEIGHT = 130;
    int V_NFRAMES = 150;
    MultiLayerConfiguration confForArchitecture = //l2 regularization on all layers
    new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.4).list().layer(0, //3 channels: RGB
    new ConvolutionLayer.Builder(10, 10).nIn(3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
    4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
    MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture);
    modelExpectedArch.init();
    MultiLayerNetwork modelMow = new TransferLearning.Builder(modelExpectedArch).setFeatureExtractor(2).build();
    System.out.println(modelExpectedArch.summary());
    System.out.println(modelMow.summary());
}
Also used : RnnToCnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor) CnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) Test(org.junit.Test)

Example 3 with RnnToCnnPreProcessor

use of org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor in project deeplearning4j by deeplearning4j.

the class TransferLearningMLNTest method testRemoveAndProcessing.

@Test
public void testRemoveAndProcessing() {
    int V_WIDTH = 130;
    int V_HEIGHT = 130;
    int V_NFRAMES = 150;
    MultiLayerConfiguration confForArchitecture = //l2 regularization on all layers
    new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.4).list().layer(0, //3 channels: RGB
    new ConvolutionLayer.Builder(10, 10).nIn(3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
    4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
    MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture);
    modelExpectedArch.init();
    MultiLayerNetwork modelToTweak = new MultiLayerNetwork(new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(//change learning rate
    0.1).updater(// change updater
    Updater.RMSPROP).list().layer(0, //Only keep the first layer the same
    new ConvolutionLayer.Builder(10, 10).nIn(//3 channels: RGB
    3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(//change kernel size
    SubsamplingLayer.PoolingType.MAX).kernelSize(5, 5).stride(2, 2).build()).layer(2, //change here
    new ConvolutionLayer.Builder(6, 6).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).build()).layer(3, //change here
    new DenseLayer.Builder().activation(Activation.RELU).nIn(250).nOut(50).weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.01).build()).layer(4, //change here
    new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(25).weightInit(WeightInit.XAVIER).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(25).nOut(4).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(5, 5, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build());
    modelToTweak.init();
    MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak).fineTuneConfiguration(//l2 regularization on all layers
    new FineTuneConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.ADAGRAD).weightInit(WeightInit.RELU).iterations(1).learningRate(0.4).build()).removeLayersFromOutput(5).addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
    4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).setInputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).setInputPreProcessor(4, new FeedForwardToRnnPreProcessor()).build();
    //modelNow should have the same architecture as modelExpectedArch
    assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(0).toJson(), modelNow.getLayerWiseConfigurations().getConf(0).toJson());
    //some learning related info the subsampling layer will not be overwritten
    //assertTrue(modelExpectedArch.getLayerWiseConfigurations().getConf(1).toJson().equals(modelNow.getLayerWiseConfigurations().getConf(1).toJson()));
    assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(2).toJson(), modelNow.getLayerWiseConfigurations().getConf(2).toJson());
    assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(3).toJson(), modelNow.getLayerWiseConfigurations().getConf(3).toJson());
    assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(4).toJson(), modelNow.getLayerWiseConfigurations().getConf(4).toJson());
    assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(5).toJson(), modelNow.getLayerWiseConfigurations().getConf(5).toJson());
    assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape());
    assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape());
    //subsampling has no params
    //assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape());
    assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape());
    assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape());
    assertArrayEquals(modelExpectedArch.getLayer(4).params().shape(), modelNow.getLayer(4).params().shape());
    assertArrayEquals(modelExpectedArch.getLayer(5).params().shape(), modelNow.getLayer(5).params().shape());
}
Also used : RnnToCnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor) CnnToFeedForwardPreProcessor(org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor) FeedForwardToRnnPreProcessor(org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Aggregations

RnnToCnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.RnnToCnnPreProcessor)3 Test (org.junit.Test)3 CnnToFeedForwardPreProcessor (org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor)2 FeedForwardToRnnPreProcessor (org.deeplearning4j.nn.conf.preprocessor.FeedForwardToRnnPreProcessor)2 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)2 Random (java.util.Random)1 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)1 UniformDistribution (org.deeplearning4j.nn.conf.distribution.UniformDistribution)1 INDArray (org.nd4j.linalg.api.ndarray.INDArray)1