use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class TestVAE method testJsonYaml.
@Test
public void testJsonYaml() {
MultiLayerConfiguration config = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().reconstructionDistribution(new GaussianReconstructionDistribution("identity")).nIn(3).nOut(4).encoderLayerSizes(5).decoderLayerSizes(6).build()).layer(1, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().reconstructionDistribution(new GaussianReconstructionDistribution("tanh")).nIn(7).nOut(8).encoderLayerSizes(9).decoderLayerSizes(10).build()).layer(2, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().reconstructionDistribution(new BernoulliReconstructionDistribution()).nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()).layer(3, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().reconstructionDistribution(new ExponentialReconstructionDistribution("tanh")).nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()).layer(4, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().lossFunction(new ActivationTanH(), LossFunctions.LossFunction.MSE).nIn(11).nOut(12).encoderLayerSizes(13).decoderLayerSizes(14).build()).layer(5, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().reconstructionDistribution(new CompositeReconstructionDistribution.Builder().addDistribution(5, new GaussianReconstructionDistribution()).addDistribution(5, new GaussianReconstructionDistribution("tanh")).addDistribution(5, new BernoulliReconstructionDistribution()).build()).nIn(15).nOut(16).encoderLayerSizes(17).decoderLayerSizes(18).build()).layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(18).nOut(19).activation(new ActivationTanH()).build()).pretrain(true).backprop(true).build();
String asJson = config.toJson();
String asYaml = config.toYaml();
MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson);
MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(asYaml);
assertEquals(config, fromJson);
assertEquals(config, fromYaml);
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnActivateUsingStoredState.
@Test
public void testRnnActivateUsingStoredState() {
int timeSeriesLength = 12;
int miniBatchSize = 7;
int nIn = 5;
int nOut = 4;
int nTimeSlices = 5;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).build();
Nd4j.getRandom().setSeed(12345);
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, nTimeSlices * timeSeriesLength });
INDArray input = inputLong.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, timeSeriesLength));
List<INDArray> outStandard = mln.feedForward(input, true);
List<INDArray> outRnnAct = mln.rnnActivateUsingStoredState(input, true, true);
//As initially state is zeros: expect these to be the same
assertEquals(outStandard, outRnnAct);
//Furthermore, expect multiple calls to this function to be the same:
for (int i = 0; i < 3; i++) {
assertEquals(outStandard, mln.rnnActivateUsingStoredState(input, true, true));
}
List<INDArray> outStandardLong = mln.feedForward(inputLong, true);
BaseRecurrentLayer<?> l0 = ((BaseRecurrentLayer<?>) mln.getLayer(0));
BaseRecurrentLayer<?> l1 = ((BaseRecurrentLayer<?>) mln.getLayer(1));
for (int i = 0; i < nTimeSlices; i++) {
INDArray inSlice = inputLong.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(i * timeSeriesLength, (i + 1) * timeSeriesLength));
List<INDArray> outSlice = mln.rnnActivateUsingStoredState(inSlice, true, true);
List<INDArray> expOut = new ArrayList<>();
for (INDArray temp : outStandardLong) {
expOut.add(temp.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(i * timeSeriesLength, (i + 1) * timeSeriesLength)));
}
for (int j = 0; j < expOut.size(); j++) {
INDArray exp = expOut.get(j);
INDArray act = outSlice.get(j);
System.out.println(j);
System.out.println(exp.sub(act));
assertEquals(exp, act);
}
assertEquals(expOut, outSlice);
//Again, expect multiple calls to give the same output
for (int j = 0; j < 3; j++) {
outSlice = mln.rnnActivateUsingStoredState(inSlice, true, true);
assertEquals(expOut, outSlice);
}
l0.rnnSetPreviousState(l0.rnnGetTBPTTState());
l1.rnnSetPreviousState(l1.rnnGetTBPTTState());
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testGravesTLSTMInitStacked.
@Test
public void testGravesTLSTMInitStacked() {
int nIn = 8;
int nOut = 25;
int[] nHiddenUnits = { 17, 19, 23 };
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(17).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(17).nOut(19).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(2, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(19).nOut(23).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).layer(3, new RnnOutputLayer.Builder(LossFunction.MSE).nIn(23).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).activation(Activation.TANH).build()).build();
MultiLayerNetwork network = new MultiLayerNetwork(conf);
network.init();
//Ensure that we have the correct number weights and biases, that these have correct shape etc. for each layer
for (int i = 0; i < nHiddenUnits.length; i++) {
Layer layer = network.getLayer(i);
assertTrue(layer instanceof GravesLSTM);
Map<String, INDArray> paramTable = layer.paramTable();
//2 sets of weights, 1 set of biases
assertTrue(paramTable.size() == 3);
int layerNIn = (i == 0 ? nIn : nHiddenUnits[i - 1]);
INDArray recurrentWeights = paramTable.get(GravesLSTMParamInitializer.RECURRENT_WEIGHT_KEY);
//Should be shape: [layerSize,4*layerSize+3]
assertArrayEquals(recurrentWeights.shape(), new int[] { nHiddenUnits[i], 4 * nHiddenUnits[i] + 3 });
INDArray inputWeights = paramTable.get(GravesLSTMParamInitializer.INPUT_WEIGHT_KEY);
//Should be shape: [nIn,4*layerSize]
assertArrayEquals(inputWeights.shape(), new int[] { layerNIn, 4 * nHiddenUnits[i] });
INDArray biases = paramTable.get(GravesLSTMParamInitializer.BIAS_KEY);
//Should be shape: [1,4*layerSize]
assertArrayEquals(biases.shape(), new int[] { 1, 4 * nHiddenUnits[i] });
//Want forget gate biases to be initialized to > 0. See parameter initializer for details
INDArray forgetGateBiases = biases.get(NDArrayIndex.point(0), NDArrayIndex.interval(nHiddenUnits[i], 2 * nHiddenUnits[i]));
assertEquals(nHiddenUnits[i], (int) forgetGateBiases.gt(0).sum(Integer.MAX_VALUE).getDouble(0));
int nParams = recurrentWeights.length() + inputWeights.length() + biases.length();
assertTrue(nParams == layer.numParams());
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testTruncatedBPTTSimple.
@Test
public void testTruncatedBPTTSimple() {
//Extremely simple test of the 'does it throw an exception' variety
int timeSeriesLength = 12;
int miniBatchSize = 7;
int nIn = 5;
int nOut = 4;
int nTimeSlices = 20;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(nIn).nOut(7).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(7).nOut(8).activation(Activation.TANH).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).weightInit(WeightInit.DISTRIBUTION).nIn(8).nOut(nOut).activation(Activation.SOFTMAX).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 0.5)).build()).pretrain(false).backprop(true).backpropType(BackpropType.TruncatedBPTT).tBPTTBackwardLength(timeSeriesLength).tBPTTForwardLength(timeSeriesLength).build();
Nd4j.getRandom().setSeed(12345);
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
INDArray inputLong = Nd4j.rand(new int[] { miniBatchSize, nIn, nTimeSlices * timeSeriesLength });
INDArray labelsLong = Nd4j.rand(new int[] { miniBatchSize, nOut, nTimeSlices * timeSeriesLength });
mln.fit(inputLong, labelsLong);
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class MultiLayerTestRNN method testRnnTimeStepWithPreprocessor.
@Test
public void testRnnTimeStepWithPreprocessor() {
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).list().layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build()).layer(1, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().nIn(10).nOut(10).activation(Activation.TANH).build()).layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX).nIn(10).nOut(10).build()).inputPreProcessor(0, new FeedForwardToRnnPreProcessor()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray in = Nd4j.rand(1, 10);
net.rnnTimeStep(in);
}
Aggregations