use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testInputMasking.
@Test
public void testInputMasking() {
//Idea: have masking on the input with 2 dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0").addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1").addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "2").setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()).inputPreProcessor("2", new FeedForwardToRnnPreProcessor()).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray inputMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
inputMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(0, in1);
net.setLabel(0, labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
Map<String, INDArray> map = g1.gradientForVariable();
for (String s : map.keySet()) {
//Gradients are views; need to dup otherwise they will be modified by next computeGradientAndScore
map.put(s, map.get(s).dup());
}
net.setInput(0, in2);
net.setLabel(0, labels2);
net.setLayerMaskArrays(new INDArray[] { inputMask }, null);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
Map<String, INDArray> activations2 = net.feedForward();
//Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
assertNotEquals(score1, score2, 0.01);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertNotEquals(s, g1s, g2s);
}
//Modify the values at the masked time step, and check that neither the gradients, score or activations change
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
}
net.setInput(0, in2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 1e-12);
for (String s : g2.gradientForVariable().keySet()) {
assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
}
Map<String, INDArray> activations2a = net.feedForward();
for (String s : activations2.keySet()) {
assertEquals(activations2.get(s), activations2a.get(s));
}
}
//Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
INDArray l0Before = activations2.get("0");
INDArray l1Before = activations2.get("1");
INDArray l0After = temp.preProcess(l0Before, nExamples);
INDArray l1After = temp.preProcess(l1Before, nExamples);
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
}
}
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testOutputMasking.
@Test
public void testOutputMasking() {
//If labels are masked: want zero outputs for that time step.
int nIn = 3;
int[] timeSeriesLengths = { 3, 10 };
int[] outputSizes = { 1, 2, 5 };
int[] miniBatchSizes = { 1, 4 };
Random r = new Random(12345);
for (int tsLength : timeSeriesLengths) {
for (int nOut : outputSizes) {
for (int miniBatch : miniBatchSizes) {
for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
for (int i = 0; i < miniBatch; i++) {
//For each example: select which outputs to mask...
int nMasked = 0;
while (nMasked < nToMask) {
int tryIdx = r.nextInt(tsLength);
if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
continue;
labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
nMasked++;
}
}
INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net2 = new ComputationGraph(conf2);
net2.init();
net.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
net2.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
INDArray out = net.output(input)[0];
INDArray out2 = net2.output(input)[0];
for (int i = 0; i < miniBatch; i++) {
for (int j = 0; j < tsLength; j++) {
double m = labelMaskArray.getDouble(i, j);
if (m == 0.0) {
//Expect outputs to be exactly 0.0
INDArray outRow = out.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
for (int k = 0; k < nOut; k++) {
assertEquals(outRow.getDouble(k), 0.0, 0.0);
assertEquals(outRow2.getDouble(k), 0.0, 0.0);
}
}
}
}
}
}
}
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestCompGraphCNN method testCNNComputationGraphSingleOutFeatureMap.
@Test
public void testCNNComputationGraphSingleOutFeatureMap() {
int imageWidth = 23;
int imageHeight = 23;
int nChannels = 1;
int classes = 2;
int numSamples = 200;
int kernelHeight = 3;
int kernelWidth = 3;
DataSet trainInput;
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).graphBuilder().addInputs("input").setInputTypes(InputType.convolutional(imageHeight, imageWidth, nChannels)).addLayer("conv1", new ConvolutionLayer.Builder().kernelSize(kernelHeight, kernelWidth).stride(1, 1).nIn(nChannels).nOut(// check if it can take 1 nOut only
1).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build(), "input").addLayer("pool1", new SubsamplingLayer.Builder().poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(imageHeight - kernelHeight, 1).stride(1, 1).build(), "conv1").addLayer("output", new OutputLayer.Builder().nOut(classes).build(), "pool1").setOutputs("output").backprop(true).pretrain(false).build();
ComputationGraph model = new ComputationGraph(conf);
model.init();
INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels);
INDArray emptyLables = Nd4j.zeros(numSamples, classes);
trainInput = new DataSet(emptyFeatures, emptyLables);
model.fit(trainInput);
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestConvolutionModes method testGlobalLocalConfigCompGraph.
@Test
public void testGlobalLocalConfigCompGraph() {
for (ConvolutionMode cm : new ConvolutionMode[] { ConvolutionMode.Strict, ConvolutionMode.Truncate, ConvolutionMode.Same }) {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).convolutionMode(cm).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "in").addLayer("1", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Strict).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "0").addLayer("2", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Truncate).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "1").addLayer("3", new ConvolutionLayer.Builder().convolutionMode(ConvolutionMode.Same).kernelSize(3, 3).stride(3, 3).padding(0, 0).nIn(3).nOut(3).build(), "2").addLayer("4", new SubsamplingLayer.Builder().kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "3").addLayer("5", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Strict).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "4").addLayer("6", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Truncate).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "5").addLayer("7", new SubsamplingLayer.Builder().convolutionMode(ConvolutionMode.Same).kernelSize(3, 3).stride(3, 3).padding(0, 0).build(), "6").addLayer("8", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nOut(3).build(), "7").setOutputs("8").build();
assertEquals(cm, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("0")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Strict, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("1")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Truncate, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("2")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Same, ((ConvolutionLayer) ((LayerVertex) conf.getVertices().get("3")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(cm, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("4")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Strict, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("5")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Truncate, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("6")).getLayerConf().getLayer()).getConvolutionMode());
assertEquals(ConvolutionMode.Same, ((SubsamplingLayer) ((LayerVertex) conf.getVertices().get("7")).getLayerConf().getLayer()).getConvolutionMode());
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessorGraph.
@Test
public void testRnnTimeStepWithPreprocessorGraph() {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).graphBuilder().addInputs("in").addLayer("0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "in").addLayer("1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build(), "0").addLayer("2", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "1").setOutputs("2").inputPreProcessor("0", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in = Nd4j.rand(1, 10);
net.rnnTimeStep(in);
}
Aggregations