use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class BNGradientCheckTest method testGradientBNWithCNNandSubsamplingCompGraph.
@Test
public void testGradientBNWithCNNandSubsamplingCompGraph() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
double[] l2vals = { 0.0, 0.1, 0.1 };
//i.e., use l2vals[j] with l1vals[j]
double[] l1vals = { 0.0, 0.0, 0.2 };
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build(), "in").addLayer("1", new BatchNormalization.Builder().build(), "0").addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build(), "1").addLayer("3", new BatchNormalization(), "2").addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3").addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build(), "4").setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
double scoreBefore = net.score();
for (int k = 0; k < 5; k++) net.fit(ds);
net.computeGradientAndScore();
double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++) System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
assertTrue(gradOK);
}
}
}
}
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class LayerConfigTest method testWeightBiasInitLayerwiseOverride.
@Test
public void testWeightBiasInitLayerwiseOverride() {
//Without layerwise override:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(1).getLayer().getDist().toString());
assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
assertEquals(1, conf.getConf(1).getLayer().getBiasInit(), 0.0);
//With:
conf = new NeuralNetConfiguration.Builder().weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1.0)).biasInit(1).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).biasInit(0).build()).build();
net = new MultiLayerNetwork(conf);
net.init();
assertEquals(WeightInit.DISTRIBUTION, conf.getConf(0).getLayer().getWeightInit());
assertEquals(WeightInit.DISTRIBUTION, conf.getConf(1).getLayer().getWeightInit());
assertEquals("NormalDistribution{mean=0.0, std=1.0}", conf.getConf(0).getLayer().getDist().toString());
assertEquals("UniformDistribution{lower=0.0, upper=1.0}", conf.getConf(1).getLayer().getDist().toString());
assertEquals(1, conf.getConf(0).getLayer().getBiasInit(), 0.0);
assertEquals(0, conf.getConf(1).getLayer().getBiasInit(), 0.0);
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class TestDropout method testDropoutMultiLayer.
@Test
public void testDropoutMultiLayer() throws Exception {
//Testing dropout with multiple layers
//Layer input: values should be set to either 0.0 or 2.0x original value
//However: we don't have access to 'original' activations easily
//Instead: use sigmoid + weight initialization that saturates
int nIn = 8;
int layerSize = 10;
int nOut = 4;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(Updater.SGD).iterations(1).regularization(true).dropOut(0.5).learningRate(1e-9).weightInit(WeightInit.DISTRIBUTION).dist(//Weight init to cause sigmoid saturation
new UniformDistribution(10, 11)).list().layer(0, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(nIn).nOut(layerSize).build()).layer(1, new DenseLayer.Builder().activation(Activation.SIGMOID).nIn(layerSize).nOut(layerSize).build()).layer(2, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(layerSize).nOut(nOut).weightInit(WeightInit.XAVIER).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Field dropoutMaskField = BaseLayer.class.getDeclaredField("dropoutMask");
dropoutMaskField.setAccessible(true);
int nTests = 15;
Nd4j.getRandom().setSeed(12345);
int noDropoutCount = 0;
for (int i = 0; i < nTests; i++) {
INDArray in = Nd4j.rand(1, nIn).dup('c');
INDArray out = Nd4j.rand(1, nOut).dup('c');
INDArray inCopy = in.dup('c');
net.fit(new DataSet(in, out));
INDArray l0Input = net.getLayer(0).input().dup('c');
//Dropout occurred. Expect inputs to be either scaled 2x original, or set to 0.0 (with dropout = 0.5)
NdIndexIterator iter = new NdIndexIterator(inCopy.shape());
while (iter.hasNext()) {
int[] idx = iter.next();
double origValue = inCopy.getDouble(idx);
double doValue = l0Input.getDouble(idx);
if (doValue > 0.0) {
//Input was kept -> should be scaled by factor of (1.0/0.5 = 2)
assertEquals(origValue * 2.0, doValue, 0.0001);
}
}
//all be ~1.0 before dropout -> either 0 or ~2.0 after dropout
for (int j = 1; j < 3; j++) {
INDArray ljInput = net.getLayer(j).input();
for (int k = 0; k < ljInput.length(); k++) {
double doValue = ljInput.getDouble(j);
if (doValue > 0.0) {
//Input was kept -> should be scaled by factor of (1.0/0.5 = 2)
//Sigmoid is saturated -> inputs should be ~1.0 -> 2.0 after dropout
assertEquals(2.0, doValue, 0.1);
}
}
}
//Do forward pass
//(1) ensure dropout ISN'T being applied for forward pass at test time
//(2) ensure dropout ISN'T being applied for test time scoring
//If dropout is applied at test time: outputs + score will differ between passes
INDArray in2 = Nd4j.rand(1, nIn);
INDArray out2 = Nd4j.rand(1, nOut);
INDArray outTest1 = net.output(in2, false);
INDArray outTest2 = net.output(in2, false);
INDArray outTest3 = net.output(in2, false);
assertEquals(outTest1, outTest2);
assertEquals(outTest1, outTest3);
double score1 = net.score(new DataSet(in2, out2), false);
double score2 = net.score(new DataSet(in2, out2), false);
double score3 = net.score(new DataSet(in2, out2), false);
assertEquals(score1, score2, 0.0);
assertEquals(score1, score3, 0.0);
}
if (noDropoutCount >= nTests / 3) {
//at 0.5 dropout ratio and more than a few inputs, expect only a very small number of instances where
//no dropout occurs, just due to random chance
fail("Too many instances of dropout not being applied");
}
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testGravesBidirectionalLSTMForwardPassHelper.
@Test
public void testGravesBidirectionalLSTMForwardPassHelper() throws Exception {
//GravesBidirectionalLSTM.activateHelper() has different behaviour (due to optimizations) when forBackprop==true vs false
//But should otherwise provide identical activations
Nd4j.getRandom().setSeed(12345);
final int nIn = 10;
final int layerSize = 15;
final int miniBatchSize = 4;
final int timeSeriesLength = 7;
final NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(0, 1)).activation(Activation.TANH).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
final GravesBidirectionalLSTM lstm = (GravesBidirectionalLSTM) conf.getLayer().instantiate(conf, null, 0, params, true);
final INDArray input = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
lstm.setInput(input);
final INDArray fwdPassFalse = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null, false, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null).fwdPassOutput;
final INDArray[] fwdPassTrue = LSTMHelpers.activateHelper(lstm, lstm.conf(), new ActivationSigmoid(), lstm.input(), lstm.getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), lstm.getParam(GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS), false, null, null, true, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, null).fwdPassOutputAsArrays;
//I have no idea what the heck this does --Ben
for (int i = 0; i < timeSeriesLength; i++) {
final INDArray sliceFalse = fwdPassFalse.tensorAlongDimension(i, 1, 0);
final INDArray sliceTrue = fwdPassTrue[i];
assertTrue(sliceFalse.equals(sliceTrue));
}
}
use of org.deeplearning4j.nn.conf.distribution.UniformDistribution in project deeplearning4j by deeplearning4j.
the class GravesBidirectionalLSTMTest method testGetSetParmas.
@Test
public void testGetSetParmas() {
final int nIn = 2;
final int layerSize = 3;
final int miniBatchSize = 2;
final int timeSeriesLength = 10;
Nd4j.getRandom().setSeed(12345);
final NeuralNetConfiguration confBidirectional = new NeuralNetConfiguration.Builder().layer(new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-0.1, 0.1)).activation(Activation.TANH).build()).build();
int numParams = confBidirectional.getLayer().initializer().numParams(confBidirectional);
INDArray params = Nd4j.create(1, numParams);
final GravesBidirectionalLSTM bidirectionalLSTM = (GravesBidirectionalLSTM) confBidirectional.getLayer().instantiate(confBidirectional, null, 0, params, true);
final INDArray sig = Nd4j.rand(new int[] { miniBatchSize, nIn, timeSeriesLength });
final INDArray act1 = bidirectionalLSTM.activate(sig);
params = bidirectionalLSTM.params();
bidirectionalLSTM.setParams(params);
final INDArray act2 = bidirectionalLSTM.activate(sig);
assertArrayEquals(act2.data().asDouble(), act1.data().asDouble(), 1e-8);
}
Aggregations