use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testGravesBackwardBasicHelper.
private static void testGravesBackwardBasicHelper(int nIn, int nOut, int lstmNHiddenUnits, int miniBatchSize, int timeSeriesLength) {
INDArray inputData = Nd4j.ones(miniBatchSize, nIn, timeSeriesLength);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(lstmNHiddenUnits).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesBidirectionalLSTM lstm = (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
lstm.setBackpropGradientsViewArray(Nd4j.create(1, conf.getLayer().initializer().numParams(conf)));
//Set input, do a forward pass:
lstm.activate(inputData);
assertNotNull(lstm.input());
INDArray epsilon = Nd4j.ones(miniBatchSize, lstmNHiddenUnits, timeSeriesLength);
Pair<Gradient, INDArray> out = lstm.backpropGradient(epsilon);
Gradient outGradient = out.getFirst();
INDArray nextEpsilon = out.getSecond();
INDArray biasGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS);
INDArray inWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS);
INDArray recurrentWeightGradientF = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS);
assertNotNull(biasGradientF);
assertNotNull(inWeightGradientF);
assertNotNull(recurrentWeightGradientF);
INDArray biasGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS);
INDArray inWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS);
INDArray recurrentWeightGradientB = outGradient.getGradientFor(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS);
assertNotNull(biasGradientB);
assertNotNull(inWeightGradientB);
assertNotNull(recurrentWeightGradientB);
assertArrayEquals(biasGradientF.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientF.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientF.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertArrayEquals(biasGradientB.shape(), new int[] { 1, 4 * lstmNHiddenUnits });
assertArrayEquals(inWeightGradientB.shape(), new int[] { nIn, 4 * lstmNHiddenUnits });
assertArrayEquals(recurrentWeightGradientB.shape(), new int[] { lstmNHiddenUnits, 4 * lstmNHiddenUnits + 3 });
assertNotNull(nextEpsilon);
assertArrayEquals(nextEpsilon.shape(), new int[] { miniBatchSize, nIn, timeSeriesLength });
//Check update:
for (String s : outGradient.gradientForVariable().keySet()) {
lstm.update(outGradient.getGradientFor(s), s);
}
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testConvergence.
@Test
@Ignore
public void testConvergence() {
Nd4j.getRandom().setSeed(12345);
final int state1Len = 100;
final int state2Len = 30;
//segment by signal mean
//Data: has shape [miniBatchSize,nIn,timeSeriesLength];
final INDArray sig1 = Nd4j.randn(new int[] { 1, 2, state1Len }).mul(0.1);
final INDArray sig2 = Nd4j.randn(new int[] { 1, 2, state2Len }).mul(0.1).add(Nd4j.ones(new int[] { 1, 2, state2Len }).mul(1.0));
INDArray sig = Nd4j.concat(2, sig1, sig2);
INDArray labels = Nd4j.zeros(new int[] { 1, 2, state1Len + state2Len });
for (int t = 0; t < state1Len; t++) {
labels.putScalar(new int[] { 0, 0, t }, 1.0);
}
for (int t = state1Len; t < state1Len + state2Len; t++) {
labels.putScalar(new int[] { 0, 1, t }, 1.0);
}
for (int i = 0; i < 3; i++) {
sig = Nd4j.concat(2, sig, sig);
labels = Nd4j.concat(2, labels, labels);
}
final DataSet ds = new DataSet(sig, labels);
final MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(5).learningRate(0.1).rmsDecay(0.95).regularization(true).l2(0.001).updater(Updater.ADAGRAD).seed(12345).list().pretrain(false).layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.05, 0.05)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.05, 0.05)).build()).layer(2, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).nIn(2).nOut(2).activation(Activation.TANH).build()).backprop(true).build();
final MultiLayerNetwork net = new MultiLayerNetwork(conf);
final IterationListener scoreSaver = new IterationListener() {
@Override
public boolean invoked() {
return false;
}
@Override
public void invoke() {
}
@Override
public void iterationDone(Model model, int iteration) {
score = model.score();
}
};
net.setListeners(scoreSaver, new ScoreIterationListener(1));
double oldScore = Double.POSITIVE_INFINITY;
net.init();
for (int iEpoch = 0; iEpoch < 3; iEpoch++) {
net.fit(ds);
System.out.print(String.format("score is %f%n", score));
assertTrue(!Double.isNaN(score));
assertTrue(score < 0.9 * oldScore);
oldScore = score;
final INDArray output = net.output(ds.getFeatureMatrix());
Evaluation evaluation = new Evaluation();
evaluation.evalTimeSeries(ds.getLabels(), output);
System.out.print(evaluation.stats() + "\n");
}
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class MultiLayerTest method testDbn.
@Test
public void testDbn() throws Exception {
Nd4j.MAX_SLICES_TO_PRINT = -1;
Nd4j.MAX_ELEMENTS_PER_SLICE = -1;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9).optimizationAlgo(OptimizationAlgorithm.LBFGS).regularization(true).l2(2e-4).list().layer(0, new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.SOFTMAX).build()).build();
MultiLayerNetwork d = new MultiLayerNetwork(conf);
DataSetIterator iter = new IrisDataSetIterator(150, 150);
DataSet next = iter.next();
Nd4j.writeTxt(next.getFeatureMatrix(), "iris.txt", "\t");
next.normalizeZeroMeanZeroUnitVariance();
SplitTestAndTrain testAndTrain = next.splitTestAndTrain(110);
DataSet train = testAndTrain.getTrain();
d.fit(train);
DataSet test = testAndTrain.getTest();
Evaluation eval = new Evaluation();
INDArray output = d.output(test.getFeatureMatrix());
eval.eval(test.getLabels(), output);
log.info("Score " + eval.stats());
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class MultiLayerTest method testTranspose.
@Test
public void testTranspose() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(100).momentum(0.9).regularization(true).l2(2e-4).list().layer(0, new RBM.Builder(RBM.HiddenUnit.GAUSSIAN, RBM.VisibleUnit.GAUSSIAN).nIn(4).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.SOFTMAX).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Layer layer = net.getLayer(0);
int nParamsBackprop = layer.numParams(true);
int nParamsBoth = layer.numParams(false);
Layer transposed = layer.transpose();
assertArrayEquals(new int[] { 4, 3 }, layer.getParam(DefaultParamInitializer.WEIGHT_KEY).shape());
assertArrayEquals(new int[] { 1, 3 }, layer.getParam(DefaultParamInitializer.BIAS_KEY).shape());
assertArrayEquals(new int[] { 1, 4 }, layer.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY).shape());
assertArrayEquals(new int[] { 3, 4 }, transposed.getParam(DefaultParamInitializer.WEIGHT_KEY).shape());
assertArrayEquals(new int[] { 1, 4 }, transposed.getParam(DefaultParamInitializer.BIAS_KEY).shape());
assertArrayEquals(new int[] { 1, 3 }, transposed.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY).shape());
INDArray origWeights = layer.getParam(DefaultParamInitializer.WEIGHT_KEY);
INDArray transposedWeights = transposed.getParam(DefaultParamInitializer.WEIGHT_KEY);
assertEquals(origWeights.transpose(), transposedWeights);
assertEquals(layer.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY), transposed.getParam(DefaultParamInitializer.BIAS_KEY));
assertEquals(layer.getParam(DefaultParamInitializer.BIAS_KEY), transposed.getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY));
assertEquals(3, ((FeedForwardLayer) transposed.conf().getLayer()).getNIn());
assertEquals(4, ((FeedForwardLayer) transposed.conf().getLayer()).getNOut());
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class GravesLSTMTest method testGravesLSTMForwardPassHelper.
@Test
public void testGravesLSTMForwardPassHelper() throws Exception {
//GravesLSTM.activateHelper() has different behaviour (due to optimizations) when forBackprop==true vs false
//But should otherwise provide identical activations
Nd4j.getRandom().setSeed(12345);
int nIn = 10;
int layerSize = 15;
int miniBatchSize = 4;
int timeSeriesLength = 7;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
GravesLSTM lstm = (GravesLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
INDArray input = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
lstm.setInput(input);
Method actHelper = GravesLSTM.class.getDeclaredMethod("activateHelper", boolean.class, INDArray.class, INDArray.class, boolean.class);
actHelper.setAccessible(true);
//Call activateHelper with both forBackprop == true, and forBackprop == false and compare
Class<?> innerClass = Class.forName("org.deeplearning4j.nn.layers.recurrent.FwdPassReturn");
//GravesLSTM.FwdPassReturn object; want fwdPassOutput INDArray
Object oFalse = actHelper.invoke(lstm, false, null, null, false);
//want fwdPassOutputAsArrays object
Object oTrue = actHelper.invoke(lstm, false, null, null, true);
Field fwdPassOutput = innerClass.getDeclaredField("fwdPassOutput");
fwdPassOutput.setAccessible(true);
Field fwdPassOutputAsArrays = innerClass.getDeclaredField("fwdPassOutputAsArrays");
fwdPassOutputAsArrays.setAccessible(true);
INDArray fwdPassFalse = (INDArray) fwdPassOutput.get(oFalse);
INDArray[] fwdPassTrue = (INDArray[]) fwdPassOutputAsArrays.get(oTrue);
for (int i = 0; i < timeSeriesLength; i++) {
INDArray sliceFalse = fwdPassFalse.tensorAlongDimension(i, 1, 0);
INDArray sliceTrue = fwdPassTrue[i];
assertTrue(sliceFalse.equals(sliceTrue));
}
}
Aggregations