use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testTruncatedBPTTSimple.
@Test
public void testTruncatedBPTTSimple() {
//Extremely simple test of the 'does it throw an exception' variety
int timeSeriesLength = 12;
int miniBatchSize = 7;
int nIn = 5;
int nOut = 4;
int nTimeSlices = 20;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(timeSeriesLength).tBPTTForwardLength(timeSeriesLength).build();
Nd4j.getRandom().setSeed(12345);
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, nTimeSlices * timeSeriesLength });
INDArray labelsLong = Nd4j.rand(new int[] { miniBatchSize, nOut, nTimeSlices * timeSeriesLength });
mln.fit(inputLong, labelsLong);
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class CuDNNGradientChecks method testBatchNormCnn.
@Test
public void testBatchNormCnn() throws Exception {
//Note: CuDNN batch norm supports 4d only, as per 5.1 (according to api reference documentation)
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 1;
int hw = 4;
int nOut = 4;
INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().learningRate(1.0).regularization(false).updater(Updater.NONE).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).list().layer(0, new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nIn(depth).nOut(2).activation(Activation.IDENTITY).build()).layer(1, new BatchNormalization.Builder().build()).layer(2, new ActivationLayer.Builder().activation(Activation.TANH).build()).layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nOut(nOut).build()).setInputType(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true);
MultiLayerNetwork mln = new MultiLayerNetwork(builder.build());
mln.init();
Field f = org.deeplearning4j.nn.layers.normalization.BatchNormalization.class.getDeclaredField("helper");
f.setAccessible(true);
org.deeplearning4j.nn.layers.normalization.BatchNormalization b = (org.deeplearning4j.nn.layers.normalization.BatchNormalization) mln.getLayer(1);
BatchNormalizationHelper bn = (BatchNormalizationHelper) f.get(b);
assertTrue(bn instanceof CudnnBatchNormalizationHelper);
if (PRINT_RESULTS) {
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class RegressionTest060 method regressionTestMLP2.
@Test
public void regressionTestMLP2() throws Exception {
File f = new ClassPathResource("regression_testing/060/060_ModelSerializer_Regression_MLP_2.zip").getTempFileFromArchive();
MultiLayerNetwork net = ModelSerializer.restoreMultiLayerNetwork(f, true);
MultiLayerConfiguration conf = net.getLayerWiseConfigurations();
assertEquals(2, conf.getConfs().size());
assertTrue(conf.isBackprop());
assertFalse(conf.isPretrain());
DenseLayer l0 = (DenseLayer) conf.getConf(0).getLayer();
assertTrue(l0.getActivationFn() instanceof ActivationLReLU);
assertEquals(3, l0.getNIn());
assertEquals(4, l0.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
assertEquals(Updater.RMSPROP, l0.getUpdater());
assertEquals(0.96, l0.getRmsDecay(), 1e-6);
assertEquals(0.15, l0.getLearningRate(), 1e-6);
assertEquals(0.6, l0.getDropOut(), 1e-6);
assertEquals(0.1, l0.getL1(), 1e-6);
assertEquals(0.2, l0.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l0.getGradientNormalization());
assertEquals(1.5, l0.getGradientNormalizationThreshold(), 1e-5);
OutputLayer l1 = (OutputLayer) conf.getConf(1).getLayer();
assertEquals("identity", l1.getActivationFn().toString());
assertEquals(LossFunctions.LossFunction.MSE, l1.getLossFunction());
assertTrue(l1.getLossFn() instanceof LossMSE);
assertEquals(4, l1.getNIn());
assertEquals(5, l1.getNOut());
assertEquals(WeightInit.DISTRIBUTION, l0.getWeightInit());
assertEquals(new NormalDistribution(0.1, 1.2), l0.getDist());
assertEquals(Updater.RMSPROP, l0.getUpdater());
assertEquals(0.96, l1.getRmsDecay(), 1e-6);
assertEquals(0.15, l1.getLearningRate(), 1e-6);
assertEquals(0.6, l1.getDropOut(), 1e-6);
assertEquals(0.1, l1.getL1(), 1e-6);
assertEquals(0.2, l1.getL2(), 1e-6);
assertEquals(GradientNormalization.ClipElementWiseAbsoluteValue, l1.getGradientNormalization());
assertEquals(1.5, l1.getGradientNormalizationThreshold(), 1e-5);
int numParams = net.numParams();
assertEquals(Nd4j.linspace(1, numParams, numParams), net.params());
int updaterSize = net.getUpdater().stateSizeForLayer(net);
assertEquals(Nd4j.linspace(1, updaterSize, updaterSize), net.getUpdater().getStateViewArray());
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testAutoEncoder.
@Test
public void testAutoEncoder() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
String[] activFns = { "sigmoid", "tanh" };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
iter.setPreProcessor(scaler);
DataSet ds = iter.next();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
NormalizerStandardize norm = new NormalizerStandardize();
norm.fit(ds);
norm.transform(ds);
double[] l2vals = { 0.2, 0.0, 0.2 };
//i.e., use l2vals[i] with l1vals[i]
double[] l1vals = { 0.0, 0.3, 0.3 };
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0).l2(l2).l1(l1).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).activation(outputActivation).build()).pretrain(true).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
}
use of org.deeplearning4j.nn.conf.distribution.NormalDistribution in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testGradientGravesLSTMEdgeCases.
@Test
public void testGradientGravesLSTMEdgeCases() {
//Edge cases: T=1, miniBatchSize=1, both
int[] timeSeriesLength = { 1, 5, 1 };
int[] miniBatchSize = { 7, 1, 1 };
int nIn = 7;
int layerSize = 9;
int nOut = 4;
for (int i = 0; i < timeSeriesLength.length; i++) {
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize[i], nIn, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength[i]; k++) {
input.putScalar(new int[] { m, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
for (int m = 0; m < miniBatchSize[i]; m++) {
for (int j = 0; j < timeSeriesLength[i]; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] { m, idx, j }, 1.0f);
}
}
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(false).seed(12345L).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i] + ", miniBatchSize=" + miniBatchSize[i];
assertTrue(msg, gradOK);
}
}
Aggregations