use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class NeuralNetConfigurationTest method getRBMConfig.
private static NeuralNetConfiguration getRBMConfig(int nIn, int nOut, WeightInit weightInit, boolean pretrain) {
RBM layer = new RBM.Builder().nIn(nIn).nOut(nOut).weightInit(weightInit).dist(new NormalDistribution(1, 1)).visibleUnit(RBM.VisibleUnit.GAUSSIAN).hiddenUnit(RBM.HiddenUnit.RECTIFIED).activation(Activation.TANH).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build();
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(3).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).regularization(false).layer(layer).build();
conf.setPretrain(pretrain);
return conf;
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class LayerConfigValidationTest method testPredefinedConfigValues.
@Test
public void testPredefinedConfigValues() {
double expectedMomentum = 0.9;
double expectedAdamMeanDecay = 0.9;
double expectedAdamVarDecay = 0.999;
double expectedRmsDecay = 0.95;
Distribution expectedDist = new NormalDistribution(0, 1);
double expectedL1 = 0.0;
double expectedL2 = 0.0;
// Nesterovs Updater
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.NESTEROVS).regularization(true).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).momentum(0.4).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Layer layerConf = net.getLayer(0).conf().getLayer();
assertEquals(expectedMomentum, layerConf.getMomentum(), 1e-3);
assertEquals(expectedL1, layerConf.getL1(), 1e-3);
assertEquals(0.5, layerConf.getL2(), 1e-3);
Layer layerConf1 = net.getLayer(1).conf().getLayer();
assertEquals(0.4, layerConf1.getMomentum(), 1e-3);
// Adam Updater
conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.ADAM).regularization(true).weightInit(WeightInit.DISTRIBUTION).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).l2(0.5).l1(0.3).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
net = new MultiLayerNetwork(conf);
net.init();
layerConf = net.getLayer(0).conf().getLayer();
assertEquals(0.3, layerConf.getL1(), 1e-3);
assertEquals(0.5, layerConf.getL2(), 1e-3);
layerConf1 = net.getLayer(1).conf().getLayer();
assertEquals(expectedAdamMeanDecay, layerConf1.getAdamMeanDecay(), 1e-3);
assertEquals(expectedAdamVarDecay, layerConf1.getAdamVarDecay(), 1e-3);
assertEquals(expectedDist, layerConf1.getDist());
// l1 & l2 local should still be set whether regularization true or false
assertEquals(expectedL1, layerConf1.getL1(), 1e-3);
assertEquals(expectedL2, layerConf1.getL2(), 1e-3);
//RMSProp Updater
conf = new NeuralNetConfiguration.Builder().learningRate(0.3).updater(Updater.RMSPROP).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).rmsDecay(0.4).build()).build();
net = new MultiLayerNetwork(conf);
net.init();
layerConf = net.getLayer(0).conf().getLayer();
assertEquals(expectedRmsDecay, layerConf.getRmsDecay(), 1e-3);
assertEquals(expectedL1, layerConf.getL1(), 1e-3);
assertEquals(expectedL2, layerConf.getL2(), 1e-3);
layerConf1 = net.getLayer(1).conf().getLayer();
assertEquals(0.4, layerConf1.getRmsDecay(), 1e-3);
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class OutputLayerTest method testRnnOutputLayerIncEdgeCases.
@Test
public void testRnnOutputLayerIncEdgeCases() {
//Basic test + test edge cases: timeSeriesLength==1, miniBatchSize==1, both
int[] tsLength = { 5, 1, 5, 1 };
int[] miniBatch = { 7, 7, 1, 1 };
int nIn = 3;
int nOut = 6;
int layerSize = 4;
FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
for (int t = 0; t < tsLength.length; t++) {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = tsLength[t];
int miniBatchSize = miniBatch[t];
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels3d = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels3d.putScalar(new int[] { i, idx, j }, 1.0f);
}
}
INDArray labels2d = proc.backprop(labels3d, miniBatchSize);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray out2d = mln.feedForward(input).get(2);
INDArray out3d = proc.preProcess(out2d, miniBatchSize);
MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
mlnRnn.init();
INDArray outRnn = mlnRnn.feedForward(input).get(2);
mln.setLabels(labels2d);
mlnRnn.setLabels(labels3d);
mln.computeGradientAndScore();
mlnRnn.computeGradientAndScore();
//score is average over all examples.
//However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping)
//RnnOutputLayer has miniBatch examples
//Hence: expect difference in scores by factor of timeSeriesLength
double score = mln.score() * timeSeriesLength;
double scoreRNN = mlnRnn.score();
assertTrue(!Double.isNaN(score));
assertTrue(!Double.isNaN(scoreRNN));
double relError = Math.abs(score - scoreRNN) / (Math.abs(score) + Math.abs(scoreRNN));
System.out.println(relError);
assertTrue(relError < 1e-6);
//Check labels and inputs for output layer:
OutputLayer ol = (OutputLayer) mln.getOutputLayer();
assertArrayEquals(ol.getInput().shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
assertArrayEquals(ol.getLabels().shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer();
//assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength});
//Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version.
//Not ideal, but everything else works.
assertArrayEquals(rnnol.getLabels().shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
//Check shapes of output for both:
assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray out = mln.output(input);
assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray act = mln.activate();
assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray preout = mln.preOutput(input);
assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray outFFRnn = mlnRnn.feedForward(input).get(2);
assertArrayEquals(outFFRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray outRnn2 = mlnRnn.output(input);
assertArrayEquals(outRnn2.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray actRnn = mlnRnn.activate();
assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray preoutRnn = mlnRnn.preOutput(input);
assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
}
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class RBMTests method testMnist.
@Test
public void testMnist() throws Exception {
MnistDataFetcher fetcher = new MnistDataFetcher(true);
Nd4j.ENFORCE_NUMERICAL_STABILITY = true;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().iterations(30).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1e-1f).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().nIn(784).nOut(600).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(1, 1e-5)).lossFunction(LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY).build()).build();
conf.setPretrain(true);
org.deeplearning4j.nn.conf.layers.RBM layerConf = (org.deeplearning4j.nn.conf.layers.RBM) conf.getLayer();
fetcher.fetch(10);
DataSet d2 = fetcher.next();
org.nd4j.linalg.api.rng.distribution.Distribution dist = Nd4j.getDistributions().createNormal(1, 1e-5);
System.out.println(dist.sample(new int[] { layerConf.getNIn(), layerConf.getNOut() }));
INDArray input = d2.getFeatureMatrix();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
RBM rbm = (RBM) conf.getLayer().instantiate(conf, null, 0, params, true);
rbm.fit(input);
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class OutputLayerTest method testOutputLayersRnnForwardPass.
@Test
public void testOutputLayersRnnForwardPass() {
//Test output layer with RNNs (
//Expect all outputs etc. to be 2d
int nIn = 2;
int nOut = 5;
int layerSize = 4;
int timeSeriesLength = 6;
int miniBatchSize = 3;
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray out2d = mln.feedForward(input).get(2);
assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray out = mln.output(input);
assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray act = mln.activate();
assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray preout = mln.preOutput(input);
assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
//As above, but for RnnOutputLayer. Expect all activations etc. to be 3d
MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).build();
MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
mln.init();
INDArray out3d = mlnRnn.feedForward(input).get(2);
assertArrayEquals(out3d.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray outRnn = mlnRnn.output(input);
assertArrayEquals(outRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray actRnn = mlnRnn.activate();
assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray preoutRnn = mlnRnn.preOutput(input);
assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
}
Aggregations