use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestHistogramListener method testUI.
@Test
public void testUI() throws Exception {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(4).nOut(4).build()).layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setListeners(new HistogramIterationListener(1), new ScoreIterationListener(1));
DataSetIterator iter = new IrisDataSetIterator(150, 150);
for (int i = 0; i < 100; i++) {
net.fit(iter);
Thread.sleep(1000);
}
Thread.sleep(100000);
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testMomentumScheduleMLN.
@Test
public void testMomentumScheduleMLN() {
double lr = 1e-2;
double mu = 0.6;
Map<Integer, Double> momentumAfter = new HashMap<>();
momentumAfter.put(1, 0.2);
int iterations = 2;
int[] nIns = { 4, 2 };
int[] nOuts = { 2, 3 };
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).momentumAfter(momentumAfter).iterations(iterations).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
int stateSize = updater.stateSizeForLayer(net);
updater.setStateViewArray(net, Nd4j.create(1, stateSize), true);
String wKey, bKey;
Gradient gradientExpected = new DefaultGradient();
for (int k = 0; k < net.getnLayers(); k++) {
wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
}
Gradient gradientMLN = new DefaultGradient();
for (int j = 0; j < 2; j++) {
wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientMLN.setGradientFor(wKey, Nd4j.ones(nIns[j], nOuts[j]));
bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientMLN.setGradientFor(bKey, Nd4j.ones(1, nOuts[j]));
}
for (int i = 0; i < 2; i++) {
updater.update(net, gradientMLN, i, 1);
mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i);
assertEquals(mu, net.getLayer(1).conf().getLayer().getMomentum(), 1e-4);
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testVariableLengthSimple.
@Test
public void testVariableLengthSimple() {
//Test: Simple RNN layer + RNNOutputLayer
//Length of 4 for standard
//Length of 5 with last time step output mask set to 0
//Expect the same gradients etc in both cases...
int[] miniBatchSizes = { 1, 2, 5 };
int nOut = 1;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labelMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
labelMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(in1);
net.setLabels(labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
net.setInput(in2);
net.setLabels(labels2);
net.setLayerMaskArrays(null, labelMask);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
//Scores and gradients should be identical for two cases (given mask array)
assertEquals(score1, score2, 0.0);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertEquals(s, g1s, g2s);
}
// (a) score, (b) gradients
for (int i = 0; i < nExamples; i++) {
for (int j = 0; j < nOut; j++) {
double d = r.nextDouble();
labels2.putScalar(new int[] { i, j, 4 }, d);
}
net.setLabels(labels2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 0.0);
for (String s : g2map.keySet()) {
INDArray g2s = g2map.get(s);
INDArray g2sa = g2a.getGradientFor(s);
assertEquals(s, g2s, g2sa);
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testOutputMasking.
@Test
public void testOutputMasking() {
//If labels are masked: want zero outputs for that time step.
int nIn = 3;
int[] timeSeriesLengths = { 3, 10 };
int[] outputSizes = { 1, 2, 5 };
int[] miniBatchSizes = { 1, 4 };
Random r = new Random(12345);
for (int tsLength : timeSeriesLengths) {
for (int nOut : outputSizes) {
for (int miniBatch : miniBatchSizes) {
for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
for (int i = 0; i < miniBatch; i++) {
//For each example: select which outputs to mask...
int nMasked = 0;
while (nMasked < nToMask) {
int tryIdx = r.nextInt(tsLength);
if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
continue;
labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
nMasked++;
}
}
INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(5).nOut(nOut).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2);
mln2.init();
mln.setLayerMaskArrays(null, labelMaskArray);
mln2.setLayerMaskArrays(null, labelMaskArray);
INDArray out = mln.output(input);
INDArray out2 = mln2.output(input);
for (int i = 0; i < miniBatch; i++) {
for (int j = 0; j < tsLength; j++) {
double m = labelMaskArray.getDouble(i, j);
if (m == 0.0) {
//Expect outputs to be exactly 0.0
INDArray outRow = out.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
INDArray outRow2 = out2.get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(j));
for (int k = 0; k < nOut; k++) {
assertEquals(outRow.getDouble(k), 0.0, 0.0);
assertEquals(outRow2.getDouble(k), 0.0, 0.0);
}
}
}
}
}
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testMaskingBidirectionalRnn.
@Test
public void testMaskingBidirectionalRnn() {
//Idea: mask some of the time steps, like [1,1,1,0,0]. We expect the activations for the first 3 time steps
// to be the same as if we'd just fed in [1,1,1] for that example
Nd4j.getRandom().setSeed(12345);
int nIn = 4;
int layerSize = 3;
int nOut = 3;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.XAVIER).activation(Activation.TANH).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).build()).layer(1, new GravesBidirectionalLSTM.Builder().nIn(layerSize).nOut(layerSize).build()).layer(2, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(layerSize).nOut(nOut).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
int tsLength = 5;
int minibatch = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, nIn, tsLength });
INDArray labels = Nd4j.rand(new int[] { minibatch, nOut, tsLength });
INDArray featuresMask = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 1, 1, 1, 1, 0 }, { 1, 1, 1, 0, 0 } });
INDArray labelsMask = featuresMask.dup();
net.setLayerMaskArrays(featuresMask, labelsMask);
INDArray outMasked = net.output(input);
net.clearLayerMaskArrays();
//Check forward pass:
for (int i = 0; i < minibatch; i++) {
INDArrayIndex[] idx = new INDArrayIndex[] { NDArrayIndex.interval(i, i, true), NDArrayIndex.all(), NDArrayIndex.interval(0, tsLength - i) };
INDArray expExampleOut = net.output(input.get(idx));
INDArray actualExampleOut = outMasked.get(idx);
// System.out.println(i);
assertEquals(expExampleOut, actualExampleOut);
}
//Also: check the score examples method...
DataSet ds = new DataSet(input, labels, featuresMask, labelsMask);
INDArray exampleScores = net.scoreExamples(ds, false);
//One score per time series (added over each time step)
assertArrayEquals(new int[] { minibatch, 1 }, exampleScores.shape());
for (int i = 0; i < minibatch; i++) {
INDArrayIndex[] idx = new INDArrayIndex[] { NDArrayIndex.interval(i, i, true), NDArrayIndex.all(), NDArrayIndex.interval(0, tsLength - i) };
DataSet dsSingle = new DataSet(input.get(idx), labels.get(idx));
INDArray exampleSingleScore = net.scoreExamples(dsSingle, false);
double exp = exampleSingleScore.getDouble(0);
double act = exampleScores.getDouble(i);
// System.out.println(i + "\t" + exp + "\t" + act);
assertEquals(exp, act, 1e-6);
}
}
Aggregations