use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestSetGetParameters method testInitWithParamsCG.
@Test
public void testInitWithParamsCG() {
Nd4j.getRandom().setSeed(12345);
//Create configuration. Doesn't matter if this doesn't actually work for forward/backward pass here
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1", new GravesLSTM.Builder().nIn(10).nOut(10).build(), "in").addLayer("2", new GravesBidirectionalLSTM.Builder().nIn(10).nOut(10).build(), "in").addLayer("3", new ConvolutionLayer.Builder().nIn(10).nOut(10).kernelSize(2, 2).stride(2, 2).padding(2, 2).build(), "in").addLayer("4", new OutputLayer.Builder(LossFunction.MCXENT).nIn(10).nOut(10).build(), "3").addLayer("5", new OutputLayer.Builder(LossFunction.MCXENT).nIn(10).nOut(10).build(), "0").addLayer("6", new RnnOutputLayer.Builder(LossFunction.MCXENT).nIn(10).nOut(10).build(), "1", "2").setOutputs("4", "5", "6").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray params = net.params();
ComputationGraph net2 = new ComputationGraph(conf);
net2.init(params, true);
ComputationGraph net3 = new ComputationGraph(conf);
net3.init(params, false);
assertEquals(params, net2.params());
assertEquals(params, net3.params());
//Different objects due to clone
assertFalse(params == net2.params());
//Same object due to clone
assertTrue(params == net3.params());
Map<String, INDArray> paramsMap = net.paramTable();
Map<String, INDArray> paramsMap2 = net2.paramTable();
Map<String, INDArray> paramsMap3 = net3.paramTable();
for (String s : paramsMap.keySet()) {
assertEquals(paramsMap.get(s), paramsMap2.get(s));
assertEquals(paramsMap.get(s), paramsMap3.get(s));
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestUpdaters method testUpdaters.
@Test
public void testUpdaters() throws Exception {
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.NESTEROVS).momentum(0.9).graphBuilder().addInputs(// 40x40x1
"input").addLayer("l0_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 1, 1 }, new int[] { 1, 1 }).nOut(100).build(), // out: 40x40x100
"input").addLayer("l1_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 21x21x100
"l0_cnn").addLayer("l2_cnn", new ConvolutionLayer.Builder(new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).nOut(200).build(), // 11x11x200
"l1_max").addLayer("l3_max", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 3, 3 }, new int[] { 2, 2 }, new int[] { 1, 1 }).build(), // 6x6x200
"l2_cnn").addLayer("l4_fc", new DenseLayer.Builder().nOut(1024).build(), // output: 1x1x1024
"l3_max").addLayer("l5_out", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).nOut(10).activation(Activation.SOFTMAX).build(), "l4_fc").setOutputs("l5_out").backprop(true).pretrain(false).setInputTypes(InputType.convolutional(40, 40, 1)).build();
//First: check that the nIns are set properly...
Map<String, GraphVertex> map = conf.getVertices();
LayerVertex l0_cnn = (LayerVertex) map.get("l0_cnn");
LayerVertex l2_cnn = (LayerVertex) map.get("l2_cnn");
LayerVertex l4_fc = (LayerVertex) map.get("l4_fc");
LayerVertex l5_out = (LayerVertex) map.get("l5_out");
assertEquals(1, ((FeedForwardLayer) l0_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(100, ((FeedForwardLayer) l2_cnn.getLayerConf().getLayer()).getNIn());
assertEquals(6 * 6 * 200, ((FeedForwardLayer) l4_fc.getLayerConf().getLayer()).getNIn());
assertEquals(1024, ((FeedForwardLayer) l5_out.getLayerConf().getLayer()).getNIn());
//Check updaters state:
ComputationGraph g = new ComputationGraph(conf);
g.init();
g.initGradientsView();
ComputationGraphUpdater updater = g.getUpdater();
//First: get the updaters array
Field layerUpdatersField = updater.getClass().getDeclaredField("layerUpdaters");
layerUpdatersField.setAccessible(true);
org.deeplearning4j.nn.api.Updater[] layerUpdaters = (org.deeplearning4j.nn.api.Updater[]) layerUpdatersField.get(updater);
//And get the map between names and updater indexes
Field layerUpdatersMapField = updater.getClass().getDeclaredField("layerUpdatersMap");
layerUpdatersMapField.setAccessible(true);
Map<String, Integer> layerUpdatersMap = (Map<String, Integer>) layerUpdatersMapField.get(updater);
//Go through each layer; check that the updater state size matches the parameters size
org.deeplearning4j.nn.api.Layer[] layers = g.getLayers();
for (org.deeplearning4j.nn.api.Layer l : layers) {
String layerName = l.conf().getLayer().getLayerName();
int nParams = l.numParams();
Map<String, INDArray> paramTable = l.paramTable();
Map<String, Integer> parameterSizeCounts = new LinkedHashMap<>();
for (Map.Entry<String, INDArray> e : paramTable.entrySet()) {
parameterSizeCounts.put(e.getKey(), e.getValue().length());
}
int updaterIdx = layerUpdatersMap.get(layerName);
org.deeplearning4j.nn.api.Updater u = layerUpdaters[updaterIdx];
LayerUpdater lu = (LayerUpdater) u;
Field updaterForVariableField = LayerUpdater.class.getDeclaredField("updaterForVariable");
updaterForVariableField.setAccessible(true);
Map<String, GradientUpdater> updaterForVariable = (Map<String, GradientUpdater>) updaterForVariableField.get(lu);
Map<String, Integer> updaterStateSizeCounts = new HashMap<>();
for (Map.Entry<String, GradientUpdater> entry : updaterForVariable.entrySet()) {
GradientUpdater gu = entry.getValue();
Nesterovs nesterovs = (Nesterovs) gu;
INDArray v = nesterovs.getV();
int length = (v == null ? -1 : v.length());
updaterStateSizeCounts.put(entry.getKey(), length);
}
//Check subsampling layers:
if (l.numParams() == 0) {
assertEquals(0, updaterForVariable.size());
}
System.out.println(layerName + "\t" + nParams + "\t" + parameterSizeCounts + "\t Updater size: " + updaterStateSizeCounts);
//Now, with nesterov updater: 1 history value per parameter
for (String s : parameterSizeCounts.keySet()) {
int paramSize = parameterSizeCounts.get(s);
int updaterSize = updaterStateSizeCounts.get(s);
assertEquals(layerName + "/" + s, paramSize, updaterSize);
}
}
//minibatch, depth, height, width
INDArray in = Nd4j.create(2, 1, 40, 40);
INDArray l = Nd4j.create(2, 10);
DataSet ds = new DataSet(in, l);
g.fit(ds);
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testVariableLengthSimple.
@Test
public void testVariableLengthSimple() {
//Test: Simple RNN layer + RNNOutputLayer
//Length of 4 for standard
//Length of 5 with last time step output mask set to 0
//Expect the same gradients etc in both cases...
int[] miniBatchSizes = { 1, 2, 5 };
int nOut = 1;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build(), "in").addLayer("1", new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build(), "0").setOutputs("1").build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labelMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
labelMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(0, in1);
net.setLabel(0, labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
net.setInput(0, in2);
net.setLabel(0, labels2);
net.setLayerMaskArrays(null, new INDArray[] { labelMask });
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
//Scores and gradients should be identical for two cases (given mask array)
assertEquals(score1, score2, 0.0);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
assertEquals(s, g1s, g2s);
}
// (a) score, (b) gradients
for (int i = 0; i < nExamples; i++) {
for (int j = 0; j < nOut; j++) {
double d = r.nextDouble();
labels2.putScalar(new int[] { i, j, 4 }, d);
}
net.setLabel(0, labels2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 0.0);
for (String s : g2map.keySet()) {
INDArray g2s = g2map.get(s);
INDArray g2sa = g2a.getGradientFor(s);
assertEquals(s, g2s, g2sa);
}
}
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTSCG method testOutputMaskingScoreMagnitudes.
@Test
public void testOutputMaskingScoreMagnitudes() {
//Idea: check magnitude of scores, with differing number of values masked out
//i.e., MSE with zero weight init and 1.0 labels: know what to expect in terms of score
int nIn = 3;
int[] timeSeriesLengths = { 3, 10 };
int[] outputSizes = { 1, 2, 5 };
int[] miniBatchSizes = { 1, 4 };
Random r = new Random(12345);
for (int tsLength : timeSeriesLengths) {
for (int nOut : outputSizes) {
for (int miniBatch : miniBatchSizes) {
for (int nToMask = 0; nToMask < tsLength - 1; nToMask++) {
String msg = "tsLen=" + tsLength + ", nOut=" + nOut + ", miniBatch=" + miniBatch;
INDArray labelMaskArray = Nd4j.ones(miniBatch, tsLength);
for (int i = 0; i < miniBatch; i++) {
//For each example: select which outputs to mask...
int nMasked = 0;
while (nMasked < nToMask) {
int tryIdx = r.nextInt(tsLength);
if (labelMaskArray.getDouble(i, tryIdx) == 0.0)
continue;
labelMaskArray.putScalar(new int[] { i, tryIdx }, 0.0);
nMasked++;
}
}
INDArray input = Nd4j.rand(new int[] { miniBatch, nIn, tsLength });
INDArray labels = Nd4j.ones(miniBatch, nOut, tsLength);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(5).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build(), "in").addLayer("1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.IDENTITY).nIn(5).nOut(nOut).weightInit(WeightInit.ZERO).updater(Updater.NONE).build(), "0").setOutputs("1").pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
//MSE loss function: 1/n * sum(squaredErrors)... but sum(squaredErrors) = n * (1-0) here -> sum(squaredErrors)
//Sum over minibatches, then divide by minibatch size
double expScore = tsLength - nToMask;
net.setLayerMaskArrays(null, new INDArray[] { labelMaskArray });
net.setInput(0, input);
net.setLabel(0, labels);
net.computeGradientAndScore();
double score = net.score();
assertEquals(msg, expScore, score, 0.1);
}
}
}
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class ComputationGraphTestRNN method testRnnTimeStepMultipleInOut.
@Test
public void testRnnTimeStepMultipleInOut() {
//Test rnnTimeStep functionality with multiple inputs and outputs...
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = 12;
//4 layer network: 2 GravesLSTM + DenseLayer + RnnOutputLayer. Hence also tests preprocessors.
//Network architecture: lstm0 -> Dense -> RnnOutputLayer0
// and lstm1 -> Dense -> RnnOutputLayer1
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).graphBuilder().addInputs("in0", "in1").addLayer("lstm0", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(5).nOut(6).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in0").addLayer("lstm1", new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(4).nOut(5).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "in1").addLayer("dense", new DenseLayer.Builder().nIn(6 + 5).nOut(9).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "lstm0", "lstm1").addLayer("out0", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(3).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").addLayer("out1", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(9).nOut(4).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build(), "dense").setOutputs("out0", "out1").inputPreProcessor("dense", new RnnToFeedForwardPreProcessor()).inputPreProcessor("out0", new FeedForwardToRnnPreProcessor()).inputPreProcessor("out1", new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
INDArray input0 = Nd4j.rand(new int[] { 3, 5, timeSeriesLength });
INDArray input1 = Nd4j.rand(new int[] { 3, 4, timeSeriesLength });
Map<String, INDArray> allOutputActivations = graph.feedForward(new INDArray[] { input0, input1 }, true);
INDArray fullActLSTM0 = allOutputActivations.get("lstm0");
INDArray fullActLSTM1 = allOutputActivations.get("lstm1");
INDArray fullActOut0 = allOutputActivations.get("out0");
INDArray fullActOut1 = allOutputActivations.get("out1");
assertArrayEquals(new int[] { 3, 6, timeSeriesLength }, fullActLSTM0.shape());
assertArrayEquals(new int[] { 3, 5, timeSeriesLength }, fullActLSTM1.shape());
assertArrayEquals(new int[] { 3, 3, timeSeriesLength }, fullActOut0.shape());
assertArrayEquals(new int[] { 3, 4, timeSeriesLength }, fullActOut1.shape());
int[] inputLengths = { 1, 2, 3, 4, 6, 12 };
//Should get the same result regardless of step size; should be identical to standard forward pass
for (int i = 0; i < inputLengths.length; i++) {
int inLength = inputLengths[i];
//each of length inLength
int nSteps = timeSeriesLength / inLength;
graph.rnnClearPreviousState();
for (int j = 0; j < nSteps; j++) {
int startTimeRange = j * inLength;
int endTimeRange = startTimeRange + inLength;
INDArray inputSubset0 = input0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset0.size(2) == inLength);
INDArray inputSubset1 = input1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
if (inLength > 1)
assertTrue(inputSubset1.size(2) == inLength);
INDArray[] outArr = graph.rnnTimeStep(inputSubset0, inputSubset1);
assertEquals(2, outArr.length);
INDArray out0 = outArr[0];
INDArray out1 = outArr[1];
INDArray expOutSubset0;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut0.size(0), fullActOut0.size(1), 1 };
expOutSubset0 = Nd4j.create(sizes);
expOutSubset0.tensorAlongDimension(0, 1, 0).assign(fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset0 = fullActOut0.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
INDArray expOutSubset1;
if (inLength == 1) {
int[] sizes = new int[] { fullActOut1.size(0), fullActOut1.size(1), 1 };
expOutSubset1 = Nd4j.create(sizes);
expOutSubset1.tensorAlongDimension(0, 1, 0).assign(fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(startTimeRange)));
} else {
expOutSubset1 = fullActOut1.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(startTimeRange, endTimeRange));
}
assertEquals(expOutSubset0, out0);
assertEquals(expOutSubset1, out1);
Map<String, INDArray> currLSTM0State = graph.rnnGetPreviousState("lstm0");
Map<String, INDArray> currLSTM1State = graph.rnnGetPreviousState("lstm1");
INDArray lastActL0 = currLSTM0State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray lastActL1 = currLSTM1State.get(GravesLSTM.STATE_KEY_PREV_ACTIVATION);
INDArray expLastActL0 = fullActLSTM0.tensorAlongDimension(endTimeRange - 1, 1, 0);
INDArray expLastActL1 = fullActLSTM1.tensorAlongDimension(endTimeRange - 1, 1, 0);
assertEquals(expLastActL0, lastActL0);
assertEquals(expLastActL1, lastActL1);
}
}
}
Aggregations