use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class VaeGradientCheckTests method testVaeAsMLP.
@Test
public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
//activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] activFns = { "identity", "tanh" };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, i % 3, 1.0);
}
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (String afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testGradientGravesBidirectionalLSTMEdgeCases.
@Test
public void testGradientGravesBidirectionalLSTMEdgeCases() {
//Edge cases: T=1, miniBatchSize=1, both
int[] timeSeriesLength = { 1, 5, 1 };
int[] miniBatchSize = { 7, 1, 1 };
int nIn = 7;
int layerSize = 9;
int nOut = 4;
for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] { m, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < timeSeriesLength[i]; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] { m, idx, j }, 1.0f);
}
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i] + ", miniBatchSize=" + miniBatchSize[i];
assertTrue(msg, gradOK);
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testRbm.
@Test
public void testRbm() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
RBM.HiddenUnit[] hiddenFunc = { RBM.HiddenUnit.BINARY, RBM.HiddenUnit.RECTIFIED };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunction[] lossFunctions = { LossFunction.MSE, LossFunction.KL_DIVERGENCE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "sigmoid" };
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
iter.setPreProcessor(scaler);
DataSet ds = iter.next();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
double[] l2vals = { 0.4, 0.0, 0.4 };
//i.e., use l2vals[i] with l1vals[i]
double[] l1vals = { 0.0, 0.5, 0.5 };
for (RBM.HiddenUnit hidunit : hiddenFunc) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).learningRate(1.0).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L).list().layer(0, new RBM.Builder(hidunit, RBM.VisibleUnit.BINARY).nIn(4).nOut(3).weightInit(WeightInit.UNIFORM).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.SGD).activation(outputActivation).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class DropoutLayerTest method testDropoutLayerWithConvMnist.
@Test
public void testDropoutLayerWithConvMnist() throws Exception {
DataSetIterator iter = new MnistDataSetIterator(2, 2);
DataSet next = iter.next();
// Run without separate activation layer
MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.25).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
MultiLayerNetwork netIntegrated = new MultiLayerNetwork(confIntegrated);
netIntegrated.init();
netIntegrated.fit(next);
// Run with separate activation layer
MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20).activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()).layer(1, new DropoutLayer.Builder(0.25).build()).layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()).backprop(true).pretrain(false).setInputType(InputType.convolutionalFlat(28, 28, 1)).build();
MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate);
netSeparate.init();
netSeparate.fit(next);
// check parameters
assertEquals(netIntegrated.getLayer(0).getParam("W"), netSeparate.getLayer(0).getParam("W"));
assertEquals(netIntegrated.getLayer(0).getParam("b"), netSeparate.getLayer(0).getParam("b"));
assertEquals(netIntegrated.getLayer(1).getParam("W"), netSeparate.getLayer(2).getParam("W"));
assertEquals(netIntegrated.getLayer(1).getParam("b"), netSeparate.getLayer(2).getParam("b"));
// check activations
netIntegrated.setInput(next.getFeatureMatrix());
netSeparate.setInput(next.getFeatureMatrix());
Nd4j.getRandom().setSeed(12345);
List<INDArray> actTrainIntegrated = netIntegrated.feedForward(true);
Nd4j.getRandom().setSeed(12345);
List<INDArray> actTrainSeparate = netSeparate.feedForward(true);
assertEquals(actTrainIntegrated.get(1), actTrainSeparate.get(1));
assertEquals(actTrainIntegrated.get(2), actTrainSeparate.get(3));
Nd4j.getRandom().setSeed(12345);
List<INDArray> actTestIntegrated = netIntegrated.feedForward(false);
Nd4j.getRandom().setSeed(12345);
List<INDArray> actTestSeparate = netSeparate.feedForward(false);
assertEquals(actTestIntegrated.get(1), actTrainSeparate.get(1));
assertEquals(actTestIntegrated.get(2), actTestSeparate.get(3));
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class OutputLayerTest method testRnnOutputLayerIncEdgeCases.
@Test
public void testRnnOutputLayerIncEdgeCases() {
//Basic test + test edge cases: timeSeriesLength==1, miniBatchSize==1, both
int[] tsLength = { 5, 1, 5, 1 };
int[] miniBatch = { 7, 7, 1, 1 };
int nIn = 3;
int nOut = 6;
int layerSize = 4;
FeedForwardToRnnPreProcessor proc = new FeedForwardToRnnPreProcessor();
for (int t = 0; t < tsLength.length; t++) {
Nd4j.getRandom().setSeed(12345);
int timeSeriesLength = tsLength[t];
int miniBatchSize = miniBatch[t];
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels3d = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels3d.putScalar(new int[] { i, idx, j }, 1.0f);
}
}
INDArray labels2d = proc.backprop(labels3d, miniBatchSize);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).inputPreProcessor(1, new RnnToFeedForwardPreProcessor()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray out2d = mln.feedForward(input).get(2);
INDArray out3d = proc.preProcess(out2d, miniBatchSize);
MultiLayerConfiguration confRnn = new NeuralNetConfiguration.Builder().seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).build()).layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mlnRnn = new MultiLayerNetwork(confRnn);
mlnRnn.init();
INDArray outRnn = mlnRnn.feedForward(input).get(2);
mln.setLabels(labels2d);
mlnRnn.setLabels(labels3d);
mln.computeGradientAndScore();
mlnRnn.computeGradientAndScore();
//score is average over all examples.
//However: OutputLayer version has miniBatch*timeSeriesLength "examples" (after reshaping)
//RnnOutputLayer has miniBatch examples
//Hence: expect difference in scores by factor of timeSeriesLength
double score = mln.score() * timeSeriesLength;
double scoreRNN = mlnRnn.score();
assertTrue(!Double.isNaN(score));
assertTrue(!Double.isNaN(scoreRNN));
double relError = Math.abs(score - scoreRNN) / (Math.abs(score) + Math.abs(scoreRNN));
System.out.println(relError);
assertTrue(relError < 1e-6);
//Check labels and inputs for output layer:
OutputLayer ol = (OutputLayer) mln.getOutputLayer();
assertArrayEquals(ol.getInput().shape(), new int[] { miniBatchSize * timeSeriesLength, layerSize });
assertArrayEquals(ol.getLabels().shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
RnnOutputLayer rnnol = (RnnOutputLayer) mlnRnn.getOutputLayer();
//assertArrayEquals(rnnol.getInput().shape(),new int[]{miniBatchSize,layerSize,timeSeriesLength});
//Input may be set by BaseLayer methods. Thus input may end up as reshaped 2d version instead of original 3d version.
//Not ideal, but everything else works.
assertArrayEquals(rnnol.getLabels().shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
//Check shapes of output for both:
assertArrayEquals(out2d.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray out = mln.output(input);
assertArrayEquals(out.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray act = mln.activate();
assertArrayEquals(act.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray preout = mln.preOutput(input);
assertArrayEquals(preout.shape(), new int[] { miniBatchSize * timeSeriesLength, nOut });
INDArray outFFRnn = mlnRnn.feedForward(input).get(2);
assertArrayEquals(outFFRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray outRnn2 = mlnRnn.output(input);
assertArrayEquals(outRnn2.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray actRnn = mlnRnn.activate();
assertArrayEquals(actRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
INDArray preoutRnn = mlnRnn.preOutput(input);
assertArrayEquals(preoutRnn.shape(), new int[] { miniBatchSize, nOut, timeSeriesLength });
}
}
Aggregations