use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class OutputLayerTest method test3.
@Test
public void test3() {
org.nd4j.linalg.dataset.api.iterator.DataSetIterator iter = new IrisDataSetIterator(150, 150);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(3).miniBatch(false).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.XAVIER).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
org.deeplearning4j.nn.layers.OutputLayer layer = (org.deeplearning4j.nn.layers.OutputLayer) conf.getLayer().instantiate(conf, null, 0, params, true);
layer.setBackpropGradientsViewArray(Nd4j.create(1, params.length()));
DataSet next = iter.next();
next.normalizeZeroMeanZeroUnitVariance();
layer.setListeners(new ScoreIterationListener(1));
layer.fit(next);
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testSimpleForwardsAndBackwardsActivation.
@Test
public void testSimpleForwardsAndBackwardsActivation() {
final int nIn = 2;
final int layerSize = 3;
final int miniBatchSize = 1;
final int timeSeriesLength = 5;
Nd4j.getRandom().setSeed(12345);
final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).updater(Updater.NONE).build()).build();
final NeuralNetConfiguration confForwards = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.ZERO).activation(Activation.TANH).build()).build();
int numParams = confForwards.getLayer().initializer().numParams(confForwards);
INDArray params = Nd4j.create(1, numParams);
int numParamsBD = confBidirectional.getLayer().initializer().numParams(confBidirectional);
INDArray paramsBD = Nd4j.create(1, numParamsBD);
final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer().instantiate(confBidirectional, null, 0, paramsBD, true);
final GravesLSTM forwardsLSTM = (GravesLSTM) confForwards.getLayer().instantiate(confForwards, null, 0, params, true);
bidirectionalLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confBidirectional.getLayer().initializer().numParams(confBidirectional)));
forwardsLSTM.setBackpropGradientsViewArray(Nd4j.create(1, confForwards.getLayer().initializer().numParams(confForwards)));
final INDArray sig = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
final INDArray sigb = sig.dup();
reverseColumnsInPlace(sigb.slice(0));
final INDArray recurrentWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
final INDArray inputWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
final INDArray biasWeightsF = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
final INDArray recurrentWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
final INDArray inputWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
final INDArray biasWeightsF2 = forwardsLSTM.getParam(GravesLSTMParamInitializer.BIAS_KEY);
//assert that the forwards part of the bidirectional layer is equal to that of the regular LSTM
assertArrayEquals(recurrentWeightsF2.shape(), recurrentWeightsF.shape());
assertArrayEquals(inputWeightsF2.shape(), inputWeightsF.shape());
assertArrayEquals(biasWeightsF2.shape(), biasWeightsF.shape());
forwardsLSTM.setParam(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY, recurrentWeightsF);
forwardsLSTM.setParam(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY, inputWeightsF);
forwardsLSTM.setParam(GravesLSTMParamInitializer.BIAS_KEY, biasWeightsF);
//copy forwards weights to make the forwards activations do the same thing
final INDArray recurrentWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
final INDArray inputWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
final INDArray biasWeightsB = bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
//assert that the forwards and backwards are the same shapes
assertArrayEquals(recurrentWeightsF.shape(), recurrentWeightsB.shape());
assertArrayEquals(inputWeightsF.shape(), inputWeightsB.shape());
assertArrayEquals(biasWeightsF.shape(), biasWeightsB.shape());
//zero out backwards layer
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, Nd4j.zeros(inputWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, Nd4j.zeros(biasWeightsB.shape()));
forwardsLSTM.setInput(sig);
//compare activations
final INDArray activation1 = forwardsLSTM.activate(sig).slice(0);
final INDArray activation2 = bidirectionalLSTM.activate(sig).slice(0);
assertArrayEquals(activation1.data().asFloat(), activation2.data().asFloat(), 1e-5f);
final INDArray randSig = Nd4j.rand(new int[] { 1, layerSize, timeSeriesLength });
final INDArray randSigBackwards = randSig.dup();
reverseColumnsInPlace(randSigBackwards.slice(0));
final Pair<Gradient, INDArray> backprop1 = forwardsLSTM.backpropGradient(randSig);
final Pair<Gradient, INDArray> backprop2 = bidirectionalLSTM.backpropGradient(randSig);
//compare gradients
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
assertArrayEquals(backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY).dup().data().asFloat(), backprop2.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS).dup().data().asFloat(), 1e-5f);
//copy forwards to backwards
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, bidirectionalLSTM.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS));
//zero out forwards layer
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(recurrentWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, Nd4j.zeros(inputWeightsB.shape()));
bidirectionalLSTM.setParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, Nd4j.zeros(biasWeightsB.shape()));
//run on reversed signal
final INDArray activation3 = bidirectionalLSTM.activate(sigb).slice(0);
final INDArray activation3Reverse = activation3.dup();
reverseColumnsInPlace(activation3Reverse);
assertEquals(activation3Reverse, activation1);
assertArrayEquals(activation3Reverse.shape(), activation1.shape());
//test backprop now
final INDArray refBackGradientReccurrent = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
final INDArray refBackGradientInput = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
final INDArray refBackGradientBias = backprop1.getFirst().getGradientFor(GravesLSTMParamInitializer.BIAS_KEY);
//reverse weights only with backwards signal should yield same result as forwards weights with forwards signal
final Pair<Gradient, INDArray> backprop3 = bidirectionalLSTM.backpropGradient(randSigBackwards);
final INDArray backGradientRecurrent = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
final INDArray backGradientInput = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
final INDArray backGradientBias = backprop3.getFirst().getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
assertArrayEquals(refBackGradientBias.dup().data().asDouble(), backGradientBias.dup().data().asDouble(), 1e-6);
assertArrayEquals(refBackGradientInput.dup().data().asDouble(), backGradientInput.dup().data().asDouble(), 1e-6);
assertArrayEquals(refBackGradientReccurrent.dup().data().asDouble(), backGradientRecurrent.dup().data().asDouble(), 1e-6);
final INDArray refEpsilon = backprop1.getSecond().dup();
final INDArray backEpsilon = backprop3.getSecond().dup();
reverseColumnsInPlace(refEpsilon.slice(0));
assertArrayEquals(backEpsilon.dup().data().asDouble(), refEpsilon.dup().data().asDouble(), 1e-6);
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testGravesBackwardBasicHelper.
private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize, int timeSeriesLength) {
INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(lstmNHiddenUnits).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesBidirectionalLSTM lstm = (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf)));
//Set input, do a forward pass:
lstm.activate(inputData);
assertNotNull(lstm.input());
INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon);
Gradient outGradient = out.getFirst();
INDArray nextEpsilon = out.getSecond();
INDArray biasGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
INDArray inWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
INDArray recurrentWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
assertNotNull(biasGradientF);
assertNotNull(inWeightGradientF);
assertNotNull(recurrentWeightGradientF);
INDArray biasGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
INDArray inWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
INDArray recurrentWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
assertNotNull(biasGradientB);
assertNotNull(inWeightGradientB);
assertNotNull(recurrentWeightGradientB);
assertArrayEquals(biasGradientF.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientF.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientF.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertArrayEquals(biasGradientB.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientB.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientB.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertNotNull(nextEpsilon);
assertArrayEquals(nextEpsilon.shape(), new int[] { miniBatchSize, nIn, timeSeriesLength });
//Check update:
for (String s : outGradient.gradientForVariable().keySet()) {
lstm.update(outGradient.getGradientFor(s), s);
}
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class GravesLSTMTest method testLSTMGravesForwardBasic.
@Test
public void testLSTMGravesForwardBasic() {
//Very basic test of forward prop. of LSTM layer with a time series.
//Essentially make sure it doesn't throw any exceptions, and provides output in the correct shape.
int nIn = 13;
int nHiddenUnits = 17;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(nHiddenUnits).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesLSTM layer = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
//Data: has shape [miniBatchSize,nIn,timeSeriesLength];
//Output/activations has shape [miniBatchsize,nHiddenUnits,timeSeriesLength];
INDArray dataSingleExampleTimeLength1 = Nd4j.ones(1, nIn, 1);
INDArray activations1 = layer.activate(dataSingleExampleTimeLength1);
assertArrayEquals(activations1.shape(), new int[] { 1, nHiddenUnits, 1 });
INDArray dataMultiExampleLength1 = Nd4j.ones(10, nIn, 1);
INDArray activations2 = layer.activate(dataMultiExampleLength1);
assertArrayEquals(activations2.shape(), new int[] { 10, nHiddenUnits, 1 });
INDArray dataSingleExampleLength12 = Nd4j.ones(1, nIn, 12);
INDArray activations3 = layer.activate(dataSingleExampleLength12);
assertArrayEquals(activations3.shape(), new int[] { 1, nHiddenUnits, 12 });
INDArray dataMultiExampleLength15 = Nd4j.ones(10, nIn, 15);
INDArray activations4 = layer.activate(dataMultiExampleLength15);
assertArrayEquals(activations4.shape(), new int[] { 10, nHiddenUnits, 15 });
}
use of org.deeplearning4j.nn.conf.NeuralNetConfiguration in project deeplearning4j by deeplearning4j.
the class LocalResponseTest method testLrnManual.
@Test
public void testLrnManual() {
int wh = 5;
int depth = 6;
int minibatch = 3;
int n = 4;
double k = 2.0;
double alpha = 1e-4;
double beta = 0.75;
INDArray in = Nd4j.rand(new int[] { minibatch, depth, wh, wh });
INDArray outExp = Nd4j.zeros(minibatch, depth, wh, wh);
for (int m = 0; m < minibatch; m++) {
for (int x = 0; x < wh; x++) {
for (int y = 0; y < wh; y++) {
for (int i = 0; i < depth; i++) {
int jFrom = Math.max(0, i - n / 2);
int jTo = Math.min(depth - 1, i + n / 2);
double sum = 0.0;
for (int j = jFrom; j <= jTo; j++) {
double d = in.getDouble(m, j, x, y);
sum += d * d;
}
double out = in.getDouble(m, i, x, y) / Math.pow(k + alpha * sum, beta);
outExp.putScalar(m, i, x, y, out);
}
}
}
}
LocalResponseNormalization lrn = new LocalResponseNormalization.Builder().build();
NeuralNetConfiguration nnc = new NeuralNetConfiguration.Builder().layer(lrn).build();
org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization layer = (org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization) lrn.instantiate(nnc, null, 0, null, false);
INDArray outAct = layer.activate(in, true);
assertEquals(outExp, outAct);
}
Aggregations