use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testInputMasking.
@Test
public void testInputMasking() {
//Idea: have masking on the input with 2 dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "0").addLayer("2", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "1").addLayer("3", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "2").setOutputs("3").inputPreProcessor("0", new RnnToFeedForwardPreProcessor()).inputPreProcessor("2", new FeedForwardToRnnPreProcessor()).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray inputMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
inputMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(0, in1);
net.setLabel(0, labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
Map<String, INDArray> map = g1.gradientForVariable();
for (String s : map.keySet()) {
//Gradients are views; need to dup otherwise they will be modified by next computeGradientAndScore
map.put(s, map.get(s).dup());
}
net.setInput(0, in2);
net.setLabel(0, labels2);
net.setLayerMaskArrays(new INDArray[] { inputMask }, null);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
Map<String, INDArray> activations2 = net.feedForward();
//Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
assertNotEquals(score1, score2, 0.01);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertNotEquals(s, g1s, g2s);
}
//Modify the values at the masked time step, and check that neither the gradients, score or activations change
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
}
net.setInput(0, in2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 1e-12);
for (String s : g2.gradientForVariable().keySet()) {
assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
}
Map<String, INDArray> activations2a = net.feedForward();
for (String s : activations2.keySet()) {
assertEquals(activations2.get(s), activations2a.get(s));
}
}
//Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
INDArray l0Before = activations2.get("0");
INDArray l1Before = activations2.get("1");
INDArray l0After = temp.preProcess(l0Before, nExamples);
INDArray l1After = temp.preProcess(l1Before, nExamples);
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
}
}
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testGradientUpdate.
@Test
public void testGradientUpdate() {
DataSetIterator iter = new IrisDataSetIterator(1, 1);
Gradient expectedGradient = new DefaultGradient();
expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5));
expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5));
expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3));
expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3));
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder().addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input").addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).build(), "first").setOutputs("output").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
net.fit(iter.next());
Gradient actualGradient = net.gradient;
assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));
net.update(expectedGradient);
actualGradient = net.gradient;
assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));
// Update params with set
net.setParam("first_W", Nd4j.ones(4, 5));
net.setParam("first_b", Nd4j.ones(1, 5));
net.setParam("output_W", Nd4j.ones(5, 3));
net.setParam("output_b", Nd4j.ones(1, 3));
INDArray actualParams = net.params();
// Confirm params
assertEquals(Nd4j.ones(1, 43), actualParams);
net.update(expectedGradient);
actualParams = net.params();
assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testBackwardIrisBasic.
@Test
public void testBackwardIrisBasic() {
ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
ComputationGraph graph = new ComputationGraph(configuration);
graph.init();
MultiLayerConfiguration mlc = getIrisMLNConfiguration();
MultiLayerNetwork net = new MultiLayerNetwork(mlc);
net.init();
DataSetIterator iris = new IrisDataSetIterator(150, 150);
DataSet ds = iris.next();
//Now: set parameters of both networks to be identical. Then feedforward, and check we get the same outputs
Nd4j.getRandom().setSeed(12345);
int nParams = (4 * 5 + 5) + (5 * 3 + 3);
INDArray params = Nd4j.rand(1, nParams);
graph.setParams(params.dup());
net.setParams(params.dup());
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
graph.setInput(0, input.dup());
graph.setLabel(0, labels.dup());
net.setInput(input.dup());
net.setLabels(labels.dup());
//Compute gradients
net.computeGradientAndScore();
Pair<Gradient, Double> netGradScore = net.gradientAndScore();
graph.computeGradientAndScore();
Pair<Gradient, Double> graphGradScore = graph.gradientAndScore();
assertEquals(netGradScore.getSecond(), graphGradScore.getSecond(), 1e-3);
//Compare gradients
Gradient netGrad = netGradScore.getFirst();
Gradient graphGrad = graphGradScore.getFirst();
assertNotNull(graphGrad);
assertEquals(netGrad.gradientForVariable().size(), graphGrad.gradientForVariable().size());
assertEquals(netGrad.getGradientFor("0_W"), graphGrad.getGradientFor("firstLayer_W"));
assertEquals(netGrad.getGradientFor("0_b"), graphGrad.getGradientFor("firstLayer_b"));
assertEquals(netGrad.getGradientFor("1_W"), graphGrad.getGradientFor("outputLayer_W"));
assertEquals(netGrad.getGradientFor("1_b"), graphGrad.getGradientFor("outputLayer_b"));
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class SubsamplingLayerTest method createPrevGradient.
private Gradient createPrevGradient() {
Gradient gradient = new DefaultGradient();
INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth);
gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients);
gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients);
return gradient;
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class SubsamplingLayerTest method testSubSampleLayerNoneBackprop.
@Test
public void testSubSampleLayerNoneBackprop() throws Exception {
INDArray expectedContainedEpsilonInput = Nd4j.create(new double[] { 1., 1., 1., 1., 1., 1., 1., 1. }, new int[] { 1, 2, 2, 2 });
INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] { 1., 1., 1., 1., 1., 1., 1., 1. }, new int[] { 1, 2, 2, 2 });
INDArray input = getContainedData();
Layer layer = getSubsamplingLayer(SubsamplingLayer.PoolingType.NONE);
layer.setInput(input);
Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput);
assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond());
assertEquals(null, containedOutput.getFirst().getGradientFor("W"));
assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length);
INDArray input2 = getData();
layer.activate(input2);
Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon);
// depth retained
assertEquals(depth, out.getSecond().size(1));
}
Aggregations