use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateExponentialDecaySingleLayer.
@Test
public void testLearningRateExponentialDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(decayRate).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcExponentialDecay(lr, decayRate, i);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateStepDecaySingleLayer.
@Test
public void testLearningRateStepDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double steps = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Step).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcStepDecay(lr, decayRate, i, steps);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testVariableLengthSimple.
@Test
public void testVariableLengthSimple() {
//Test: Simple RNN layer + RNNOutputLayer
//Length of 4 for standard
//Length of 5 with last time step output mask set to 0
//Expect the same gradients etc in both cases...
int[] miniBatchSizes = { 1, 2, 5 };
int nOut = 1;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labelMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
labelMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(in1);
net.setLabels(labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
net.setInput(in2);
net.setLabels(labels2);
net.setLayerMaskArrays(null, labelMask);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
//Scores and gradients should be identical for two cases (given mask array)
assertEquals(score1, score2, 0.0);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertEquals(s, g1s, g2s);
}
// (a) score, (b) gradients
for (int i = 0; i < nExamples; i++) {
for (int j = 0; j < nOut; j++) {
double d = r.nextDouble();
labels2.putScalar(new int[] { i, j, 4 }, d);
}
net.setLabels(labels2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 0.0);
for (String s : g2map.keySet()) {
INDArray g2s = g2map.get(s);
INDArray g2sa = g2a.getGradientFor(s);
assertEquals(s, g2s, g2sa);
}
}
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testInputMasking.
@Test
public void testInputMasking() {
//Idea: have masking on the input with 2 dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray inputMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
inputMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(in1);
net.setLabels(labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
Map<String, INDArray> map1 = g1.gradientForVariable();
for (String s : map1.keySet()) {
//Note: gradients are a view normally -> second computeGradientAndScore would have modified the original gradient map values...
map1.put(s, map1.get(s).dup());
}
net.setInput(in2);
net.setLabels(labels2);
net.setLayerMaskArrays(inputMask, null);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
List<INDArray> activations2 = net.feedForward();
//Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
assertNotEquals(score1, score2, 0.01);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
System.out.println("-------");
System.out.println("Variable: " + s);
System.out.println(Arrays.toString(g1s.dup().data().asFloat()));
System.out.println(Arrays.toString(g2s.dup().data().asFloat()));
assertNotEquals(s, g1s, g2s);
}
//Modify the values at the masked time step, and check that neither the gradients, score or activations change
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
}
net.setInput(in2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 1e-12);
for (String s : g2.gradientForVariable().keySet()) {
assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
}
List<INDArray> activations2a = net.feedForward();
for (int k = 1; k < activations2.size(); k++) {
assertEquals(activations2.get(k), activations2a.get(k));
}
}
//Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
INDArray l0Before = activations2.get(1);
INDArray l1Before = activations2.get(2);
INDArray l0After = temp.preProcess(l0Before, nExamples);
INDArray l1After = temp.preProcess(l1Before, nExamples);
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
}
}
}
}
use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.
the class MultiLayerTest method testBackpropGradient.
@Test
public void testBackpropGradient() {
//Testing: MultiLayerNetwork.backpropGradient()
//i.e., specifically without an output layer
int nIn = 10;
int nOut = 40;
int miniBatch = 5;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(0.1).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new DenseLayer.Builder().nIn(20).nOut(30).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(2, new DenseLayer.Builder().nIn(30).nOut(nOut).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Nd4j.getRandom().setSeed(12345);
INDArray eps = Nd4j.rand(miniBatch, nOut);
INDArray input = Nd4j.rand(miniBatch, nIn);
//Need to feed forward before backprop
net.feedForward(input);
Pair<Gradient, INDArray> pair = net.backpropGradient(eps);
INDArray epsOut = pair.getSecond();
assertNotNull(epsOut);
assertArrayEquals(new int[] { miniBatch, nIn }, epsOut.shape());
Gradient g = pair.getFirst();
Map<String, INDArray> gradMap = g.gradientForVariable();
//3 layers, weight + bias gradients for each
assertEquals(6, gradMap.size());
String[] expKeys = { "0_" + DefaultParamInitializer.WEIGHT_KEY, "0_" + DefaultParamInitializer.BIAS_KEY, "1_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY, "2_" + DefaultParamInitializer.WEIGHT_KEY, "2_" + DefaultParamInitializer.BIAS_KEY };
Set<String> keys = gradMap.keySet();
for (String s : expKeys) {
assertTrue(keys.contains(s));
}
/*
System.out.println(pair);
//Use updater to go from raw gradients -> updates
//Apply learning rate, gradient clipping, adagrad/momentum/rmsprop etc
Updater updater = UpdaterCreator.getUpdater(net);
updater.update(net, g, 0, miniBatch);
StepFunction stepFunction = new NegativeGradientStepFunction();
INDArray params = net.params();
System.out.println(Arrays.toString(params.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 10)).dup().data().asFloat()));
stepFunction.step(params, g.gradient());
net.setParams(params); //params() may not be in-place
System.out.println(Arrays.toString(params.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 10)).dup().data().asFloat()));
*/
}
Aggregations