use of org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor in project deeplearning4j by deeplearning4j.
the class ConvolutionLayerSetupTest method complete.
public MultiLayerConfiguration.Builder complete() {
final int numRows = 28;
final int numColumns = 28;
int nChannels = 1;
int outputNum = 10;
int iterations = 10;
int seed = 123;
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(seed).iterations(iterations).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).list().layer(0, new org.deeplearning4j.nn.conf.layers.ConvolutionLayer.Builder(new int[] { 10, 10 }, new int[] { 2, 2 }).nIn(nChannels).nOut(6).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(//216
5 * 5 * 1 * 6).nOut(outputNum).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build()).inputPreProcessor(0, new FeedForwardToCnnPreProcessor(numRows, numColumns, nChannels)).inputPreProcessor(2, new CnnToFeedForwardPreProcessor(5, 5, 6)).backprop(true).pretrain(false);
return builder;
}
use of org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor in project deeplearning4j by deeplearning4j.
the class TestGraphNodes method testJSON.
@Test
public void testJSON() {
//The config here is non-sense, but that doesn't matter for config -> json -> config test
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in").addVertex("v1", new ElementWiseVertex(ElementWiseVertex.Op.Add), "in").addVertex("v2", new org.deeplearning4j.nn.conf.graph.MergeVertex(), "in", "in").addVertex("v3", new PreprocessorVertex(new CnnToFeedForwardPreProcessor(1, 2, 1)), "in").addVertex("v4", new org.deeplearning4j.nn.conf.graph.SubsetVertex(0, 1), "in").addVertex("v5", new DuplicateToTimeSeriesVertex("in"), "in").addVertex("v6", new LastTimeStepVertex("in"), "in").addVertex("v7", new org.deeplearning4j.nn.conf.graph.StackVertex(), "in").addVertex("v8", new org.deeplearning4j.nn.conf.graph.UnstackVertex(0, 1), "in").addLayer("out", new OutputLayer.Builder().nIn(1).nOut(1).build(), "in").setOutputs("out").build();
String json = conf.toJson();
ComputationGraphConfiguration conf2 = ComputationGraphConfiguration.fromJson(json);
assertEquals(conf, conf2);
}
use of org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor in project deeplearning4j by deeplearning4j.
the class TransferLearningMLNTest method testAllWithCNNNew.
@Test
public void testAllWithCNNNew() {
DataSet randomData = new DataSet(Nd4j.rand(10, 28 * 28 * 3).reshape(10, 3, 28, 28), Nd4j.rand(10, 10));
MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(new NeuralNetConfiguration.Builder().seed(123).iterations(1).learningRate(.01).weightInit(WeightInit.XAVIER).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).list().layer(0, new ConvolutionLayer.Builder(5, 5).nIn(3).stride(1, 1).nOut(20).activation(Activation.IDENTITY).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(5, 5).stride(1, 1).nOut(50).activation(Activation.IDENTITY).build()).layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(2, 2).build()).layer(4, new DenseLayer.Builder().activation(Activation.RELU).nOut(500).build()).layer(5, new DenseLayer.Builder().activation(Activation.RELU).nOut(250).build()).layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(100).activation(Activation.SOFTMAX).build()).setInputType(//See note below
InputType.convolutionalFlat(28, 28, 3)).backprop(true).pretrain(false).build());
modelToFineTune.init();
//10x20x12x12
INDArray asFrozenFeatures = modelToFineTune.feedForwardToLayer(2, randomData.getFeatures(), false).get(2);
NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().learningRate(0.2).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD);
FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().learningRate(0.2).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).build();
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf).setFeatureExtractor(1).removeLayersFromOutput(5).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300).build()).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build()).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build()).addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).nIn(50).nOut(10).build()).setInputPreProcessor(2, new CnnToFeedForwardPreProcessor(12, 12, 20)).build();
MultiLayerNetwork notFrozen = new MultiLayerNetwork(equivalentConf.list().layer(0, new DenseLayer.Builder().activation(Activation.RELU).nIn(12 * 12 * 20).nOut(300).build()).layer(1, new DenseLayer.Builder().activation(Activation.RELU).nIn(300).nOut(150).build()).layer(2, new DenseLayer.Builder().activation(Activation.RELU).nIn(150).nOut(50).build()).layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nIn(50).nOut(10).activation(Activation.SOFTMAX).build()).inputPreProcessor(0, new CnnToFeedForwardPreProcessor(12, 12, 20)).backprop(true).pretrain(false).build());
notFrozen.init();
assertArrayEquals(modelToFineTune.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape());
//subsampling has no params
//assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape());
assertArrayEquals(notFrozen.getLayer(0).params().shape(), modelNow.getLayer(2).params().shape());
modelNow.getLayer(2).setParams(notFrozen.getLayer(0).params());
assertArrayEquals(notFrozen.getLayer(1).params().shape(), modelNow.getLayer(3).params().shape());
modelNow.getLayer(3).setParams(notFrozen.getLayer(1).params());
assertArrayEquals(notFrozen.getLayer(2).params().shape(), modelNow.getLayer(4).params().shape());
modelNow.getLayer(4).setParams(notFrozen.getLayer(2).params());
assertArrayEquals(notFrozen.getLayer(3).params().shape(), modelNow.getLayer(5).params().shape());
modelNow.getLayer(5).setParams(notFrozen.getLayer(3).params());
int i = 0;
while (i < 3) {
notFrozen.fit(new DataSet(asFrozenFeatures, randomData.getLabels()));
modelNow.fit(randomData);
i++;
}
INDArray expectedParams = Nd4j.hstack(modelToFineTune.getLayer(0).params(), notFrozen.params());
assertEquals(expectedParams, modelNow.params());
}
use of org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor in project deeplearning4j by deeplearning4j.
the class TransferLearningMLNTest method testRemoveAndProcessing.
@Test
public void testRemoveAndProcessing() {
int V_WIDTH = 130;
int V_HEIGHT = 130;
int V_NFRAMES = 150;
MultiLayerConfiguration confForArchitecture = //l2 regularization on all layers
new NeuralNetConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(0.4).list().layer(0, //3 channels: RGB
new ConvolutionLayer.Builder(10, 10).nIn(3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).layer(2, new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(3, new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).layer(4, new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(confForArchitecture);
modelExpectedArch.init();
MultiLayerNetwork modelToTweak = new MultiLayerNetwork(new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).learningRate(//change learning rate
0.1).updater(// change updater
Updater.RMSPROP).list().layer(0, //Only keep the first layer the same
new ConvolutionLayer.Builder(10, 10).nIn(//3 channels: RGB
3).nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).layer(1, new SubsamplingLayer.Builder(//change kernel size
SubsamplingLayer.PoolingType.MAX).kernelSize(5, 5).stride(2, 2).build()).layer(2, //change here
new ConvolutionLayer.Builder(6, 6).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).build()).layer(3, //change here
new DenseLayer.Builder().activation(Activation.RELU).nIn(250).nOut(50).weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.01).build()).layer(4, //change here
new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(25).weightInit(WeightInit.XAVIER).build()).layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(25).nOut(4).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).inputPreProcessor(0, new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3)).inputPreProcessor(3, new CnnToFeedForwardPreProcessor(5, 5, 10)).inputPreProcessor(4, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build());
modelToTweak.init();
MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToTweak).fineTuneConfiguration(//l2 regularization on all layers
new FineTuneConfiguration.Builder().seed(12345).regularization(true).l2(0.001).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.ADAGRAD).weightInit(WeightInit.RELU).iterations(1).learningRate(0.4).build()).removeLayersFromOutput(5).addLayer(new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(3, 3).stride(2, 2).build()).addLayer(new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2).activation(Activation.RELU).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).build()).addLayer(new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50).weightInit(WeightInit.RELU).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.5).build()).addLayer(new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50).nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).learningRate(0.6).build()).addLayer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(50).nOut(//4 possible shapes: circle, square, arc, line
4).updater(Updater.ADAGRAD).weightInit(WeightInit.XAVIER).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(10).build()).setInputPreProcessor(3, new CnnToFeedForwardPreProcessor(7, 7, 10)).setInputPreProcessor(4, new FeedForwardToRnnPreProcessor()).build();
//modelNow should have the same architecture as modelExpectedArch
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(0).toJson(), modelNow.getLayerWiseConfigurations().getConf(0).toJson());
//some learning related info the subsampling layer will not be overwritten
//assertTrue(modelExpectedArch.getLayerWiseConfigurations().getConf(1).toJson().equals(modelNow.getLayerWiseConfigurations().getConf(1).toJson()));
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(2).toJson(), modelNow.getLayerWiseConfigurations().getConf(2).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(3).toJson(), modelNow.getLayerWiseConfigurations().getConf(3).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(4).toJson(), modelNow.getLayerWiseConfigurations().getConf(4).toJson());
assertEquals(modelExpectedArch.getLayerWiseConfigurations().getConf(5).toJson(), modelNow.getLayerWiseConfigurations().getConf(5).toJson());
assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape());
assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape());
//subsampling has no params
//assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(4).params().shape(), modelNow.getLayer(4).params().shape());
assertArrayEquals(modelExpectedArch.getLayer(5).params().shape(), modelNow.getLayer(5).params().shape());
}
use of org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor in project deeplearning4j by deeplearning4j.
the class RegressionTest071 method regressionTestCNN1.
@Test
public void regressionTestCNN1() throws Exception {
File f = new ClassPathResource("regression_testing/071/071_ModelSerializer_Regression_CNN_1.zip").getTempFileFromArchive();
MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true);
MultiLayerConfiguration conf = net.getLayerWiseConfigurations();
assertEquals(3, conf.getConfs().size());
assertTrue(conf.isBackprop());
assertFalse(conf.isPretrain());
ConvolutionLayer l0 = (ConvolutionLayer) conf.getConf(0).getLayer();
assertEquals("tanh", l0.getActivationFn().toString());
assertEquals(3, l0.getNIn());
assertEquals(3, l0.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
assertEquals(Updater.RMSPROP, l0.getUpdater());
assertEquals(0.96, l0.getRmsDecay(), 1e-6);
assertEquals(0.15, l0.getLearningRate(), 1e-6);
assertArrayEquals(new int[] { 2, 2 }, l0.getKernelSize());
assertArrayEquals(new int[] { 1, 1 }, l0.getStride());
assertArrayEquals(new int[] { 0, 0 }, l0.getPadding());
assertEquals(l0.getConvolutionMode(), ConvolutionMode.Same);
SubsamplingLayer l1 = (SubsamplingLayer) conf.getConf(1).getLayer();
assertArrayEquals(new int[] { 2, 2 }, l1.getKernelSize());
assertArrayEquals(new int[] { 1, 1 }, l1.getStride());
assertArrayEquals(new int[] { 0, 0 }, l1.getPadding());
assertEquals(PoolingType.MAX, l1.getPoolingType());
assertEquals(l1.getConvolutionMode(), ConvolutionMode.Same);
OutputLayer l2 = (OutputLayer) conf.getConf(2).getLayer();
assertEquals("sigmoid", l1.getActivationFn().toString());
assertEquals(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, l2.getLossFunction());
//TODO
assertTrue(l2.getLossFn() instanceof LossNegativeLogLikelihood);
assertEquals(26 * 26 * 3, l2.getNIn());
assertEquals(5, l2.getNOut());
assertEquals(WeightInit.RELU, l0.getWeightInit());
assertEquals(Updater.RMSPROP, l0.getUpdater());
assertEquals(0.96, l0.getRmsDecay(), 1e-6);
assertEquals(0.15, l0.getLearningRate(), 1e-6);
assertTrue(conf.getInputPreProcess(2) instanceof CnnToFeedForwardPreProcessor);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
int updaterSize = net.getUpdater().stateSizeForLayer(net);
assertEquals(Nd4j.linspace(1, updaterSize, updaterSize), net.getUpdater().getStateViewArray());
}
Aggregations