use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVariableLengthTS method testInputMasking.
@Test
public void testInputMasking() {
//Idea: have masking on the input with 2 dense layers on input
//Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked
int[] miniBatchSizes = { 1, 2, 5 };
int nIn = 2;
Random r = new Random(12345);
for (int nExamples : miniBatchSizes) {
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).updater(Updater.SGD).learningRate(0.1).seed(12345).list().layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(2).nOut(2).build()).layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1).build()).inputPreProcessor(0, new RnnToFeedForwardPreProcessor()).inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in1 = Nd4j.rand(new int[] { nExamples, 2, 4 });
INDArray in2 = Nd4j.rand(new int[] { nExamples, 2, 5 });
in2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, in1);
assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray labels1 = Nd4j.rand(new int[] { nExamples, 1, 4 });
INDArray labels2 = Nd4j.create(nExamples, 1, 5);
labels2.put(new INDArrayIndex[] { NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 3, true) }, labels1);
assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)));
INDArray inputMask = Nd4j.ones(nExamples, 5);
for (int j = 0; j < nExamples; j++) {
inputMask.putScalar(new int[] { j, 4 }, 0);
}
net.setInput(in1);
net.setLabels(labels1);
net.computeGradientAndScore();
double score1 = net.score();
Gradient g1 = net.gradient();
Map<String, INDArray> map1 = g1.gradientForVariable();
for (String s : map1.keySet()) {
//Note: gradients are a view normally -> second computeGradientAndScore would have modified the original gradient map values...
map1.put(s, map1.get(s).dup());
}
net.setInput(in2);
net.setLabels(labels2);
net.setLayerMaskArrays(inputMask, null);
net.computeGradientAndScore();
double score2 = net.score();
Gradient g2 = net.gradient();
List<INDArray> activations2 = net.feedForward();
//Scores should differ here: masking the input, not the output. Therefore 4 vs. 5 time step outputs
assertNotEquals(score1, score2, 0.01);
Map<String, INDArray> g1map = g1.gradientForVariable();
Map<String, INDArray> g2map = g2.gradientForVariable();
for (String s : g1map.keySet()) {
INDArray g1s = g1map.get(s);
INDArray g2s = g2map.get(s);
System.out.println("-------");
System.out.println("Variable: " + s);
System.out.println(Arrays.toString(g1s.dup().data().asFloat()));
System.out.println(Arrays.toString(g2s.dup().data().asFloat()));
assertNotEquals(s, g1s, g2s);
}
//Modify the values at the masked time step, and check that neither the gradients, score or activations change
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
in2.putScalar(new int[] { j, k, 4 }, r.nextDouble());
}
net.setInput(in2);
net.computeGradientAndScore();
double score2a = net.score();
Gradient g2a = net.gradient();
assertEquals(score2, score2a, 1e-12);
for (String s : g2.gradientForVariable().keySet()) {
assertEquals(g2.getGradientFor(s), g2a.getGradientFor(s));
}
List<INDArray> activations2a = net.feedForward();
for (int k = 1; k < activations2.size(); k++) {
assertEquals(activations2.get(k), activations2a.get(k));
}
}
//Finally: check that the activations for the first two (dense) layers are zero at the appropriate time step
FeedForwardToRnnPreProcessor temp = new FeedForwardToRnnPreProcessor();
INDArray l0Before = activations2.get(1);
INDArray l1Before = activations2.get(2);
INDArray l0After = temp.preProcess(l0Before, nExamples);
INDArray l1After = temp.preProcess(l1Before, nExamples);
for (int j = 0; j < nExamples; j++) {
for (int k = 0; k < nIn; k++) {
assertEquals(0.0, l0After.getDouble(j, k, 4), 0.0);
assertEquals(0.0, l1After.getDouble(j, k, 4), 0.0);
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVAE method testReconstructionDistributionsSimple.
@Test
public void testReconstructionDistributionsSimple() {
int inOutSize = 6;
ReconstructionDistribution[] reconstructionDistributions = new ReconstructionDistribution[] { new GaussianReconstructionDistribution(Activation.IDENTITY), new GaussianReconstructionDistribution(Activation.TANH), new BernoulliReconstructionDistribution(Activation.SIGMOID), new CompositeReconstructionDistribution.Builder().addDistribution(2, new GaussianReconstructionDistribution(Activation.IDENTITY)).addDistribution(2, new BernoulliReconstructionDistribution()).addDistribution(2, new GaussianReconstructionDistribution(Activation.TANH)).build() };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
for (int i = 0; i < reconstructionDistributions.length; i++) {
INDArray data;
switch(i) {
//Gaussian + identity
case 0:
case //Gaussian + tanh
1:
data = Nd4j.rand(minibatch, inOutSize);
break;
case //Bernoulli
2:
data = Nd4j.create(minibatch, inOutSize);
Nd4j.getExecutioner().exec(new BernoulliDistribution(data, 0.5), Nd4j.getRandom());
break;
case //Composite
3:
data = Nd4j.create(minibatch, inOutSize);
data.get(NDArrayIndex.all(), NDArrayIndex.interval(0, 2)).assign(Nd4j.rand(minibatch, 2));
Nd4j.getExecutioner().exec(new BernoulliDistribution(data.get(NDArrayIndex.all(), NDArrayIndex.interval(2, 4)), 0.5), Nd4j.getRandom());
data.get(NDArrayIndex.all(), NDArrayIndex.interval(4, 6)).assign(Nd4j.rand(minibatch, 2));
break;
default:
throw new RuntimeException();
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(0.2).l1(0.3).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).list().layer(0, new VariationalAutoencoder.Builder().nIn(inOutSize).nOut(3).encoderLayerSizes(5).decoderLayerSizes(6).pzxActivationFunction(Activation.TANH).reconstructionDistribution(reconstructionDistributions[i]).activation(new ActivationTanH()).updater(Updater.SGD).build()).pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
mln.fit(data);
org.deeplearning4j.nn.layers.variational.VariationalAutoencoder layer = (org.deeplearning4j.nn.layers.variational.VariationalAutoencoder) mln.getLayer(0);
assertFalse(layer.hasLossFunction());
Nd4j.getRandom().setSeed(12345);
INDArray reconstructionProb = layer.reconstructionProbability(data, 50);
assertArrayEquals(new int[] { minibatch, 1 }, reconstructionProb.shape());
Nd4j.getRandom().setSeed(12345);
INDArray reconstructionLogProb = layer.reconstructionLogProbability(data, 50);
assertArrayEquals(new int[] { minibatch, 1 }, reconstructionLogProb.shape());
// System.out.println(reconstructionDistributions[i]);
for (int j = 0; j < minibatch; j++) {
double p = reconstructionProb.getDouble(j);
double logp = reconstructionLogProb.getDouble(j);
assertTrue(p >= 0.0 && p <= 1.0);
assertTrue(logp <= 0.0);
double pFromLogP = Math.exp(logp);
assertEquals(p, pFromLogP, 1e-6);
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class BackPropMLPTest method testMLP2.
@Test
public void testMLP2() {
//Simple mini-batch test with multiple hidden layers
MultiLayerConfiguration conf = getIrisMLPSimpleConfig(new int[] { 5, 15, 3 }, Activation.TANH);
System.out.println(conf);
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
DataSetIterator iter = new IrisDataSetIterator(12, 120);
while (iter.hasNext()) {
network.fit(iter.next());
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class GravesLSTMOutputTest method getNetworkConf.
private MultiLayerConfiguration getNetworkConf(int iterations, boolean useTBPTT) {
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(0.1).regularization(true).l2(0.0025).iterations(iterations).stepFunction(new NegativeDefaultStepFunction()).list().layer(0, new GravesLSTM.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0.0, 0.01)).nIn(nIn).nOut(layerSize).updater(Updater.ADAGRAD).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).updater(Updater.ADAGRAD).nIn(layerSize).nOut(nIn).activation(Activation.SOFTMAX).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).backprop(true).pretrain(false);
if (useTBPTT) {
builder.backpropType(BackpropType.TruncatedBPTT);
builder.tBPTTBackwardLength(window / 3);
builder.tBPTTForwardLength(window / 3);
}
return builder.build();
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVAE method testForwardPass.
@Test
public void testForwardPass() {
int[][] encLayerSizes = new int[][] { { 12 }, { 12, 13 }, { 12, 13, 14 } };
for (int i = 0; i < encLayerSizes.length; i++) {
MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10).nOut(5).encoderLayerSizes(encLayerSizes[i]).decoderLayerSizes(13).build()).build();
NeuralNetConfiguration c = mlc.getConf(0);
org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer();
MultiLayerNetwork net = new MultiLayerNetwork(mlc);
net.init();
INDArray in = Nd4j.rand(1, 10);
// net.output(in);
List<INDArray> out = net.feedForward(in);
assertArrayEquals(new int[] { 1, 10 }, out.get(0).shape());
assertArrayEquals(new int[] { 1, 5 }, out.get(1).shape());
}
}
Aggregations