use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class VaeGradientCheckTests method testVaePretrain.
@Test
public void testVaePretrain() {
//activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] activFns = { "identity", "identity", "tanh", "tanh" };
String[] pzxAfns = { "identity", "tanh", "identity", "tanh" };
String[] pxzAfns = { "tanh", "identity", "tanh", "identity" };
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
INDArray features = Nd4j.rand(minibatch, 4);
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (int j = 0; j < activFns.length; j++) {
String afn = activFns[j];
String pzxAfn = pzxAfns[j];
String pxzAfn = pxzAfns[j];
//Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long...
double l2 = l2vals[j];
double l1 = l1vals[j];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).pzxActivationFunction(pzxAfn).reconstructionDistribution(new GaussianReconstructionDistribution(pxzAfn)).activation(afn).updater(Updater.SGD).build()).pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrain() - activationFn=" + afn + ", p(z|x) afn = " + pzxAfn + ", p(x|z) afn = " + pxzAfn + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int l = 0; l < mln.getnLayers(); l++) System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, 12345);
assertTrue(msg, gradOK);
}
}
}
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testForwardBasicIris.
@Test
public void testForwardBasicIris() {
ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
ComputationGraph graph = new ComputationGraph(configuration);
graph.init();
MultiLayerConfiguration mlc = getIrisMLNConfiguration();
MultiLayerNetwork net = new MultiLayerNetwork(mlc);
net.init();
DataSetIterator iris = new IrisDataSetIterator(150, 150);
DataSet ds = iris.next();
graph.setInput(0, ds.getFeatureMatrix());
Map<String, INDArray> activations = graph.feedForward(false);
//2 layers + 1 input node
assertEquals(3, activations.size());
assertTrue(activations.containsKey("input"));
assertTrue(activations.containsKey("firstLayer"));
assertTrue(activations.containsKey("outputLayer"));
//Now: set parameters of both networks to be identical. Then feedforward, and check we get the same outputs
Nd4j.getRandom().setSeed(12345);
int nParams = getNumParams();
INDArray params = Nd4j.rand(1, nParams);
graph.setParams(params.dup());
net.setParams(params.dup());
List<INDArray> mlnAct = net.feedForward(ds.getFeatureMatrix(), false);
activations = graph.feedForward(ds.getFeatureMatrix(), false);
assertEquals(mlnAct.get(0), activations.get("input"));
assertEquals(mlnAct.get(1), activations.get("firstLayer"));
assertEquals(mlnAct.get(2), activations.get("outputLayer"));
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testIrisFit.
@Test
public void testIrisFit() {
ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
ComputationGraph graph = new ComputationGraph(configuration);
graph.init();
MultiLayerConfiguration mlnConfig = getIrisMLNConfiguration();
MultiLayerNetwork net = new MultiLayerNetwork(mlnConfig);
net.init();
Nd4j.getRandom().setSeed(12345);
int nParams = getNumParams();
INDArray params = Nd4j.rand(1, nParams);
graph.setParams(params.dup());
net.setParams(params.dup());
DataSetIterator iris = new IrisDataSetIterator(75, 150);
net.fit(iris);
iris.reset();
graph.fit(iris);
//Check that parameters are equal for both models after fitting:
INDArray paramsMLN = net.params();
INDArray paramsGraph = graph.params();
assertNotEquals(params, paramsGraph);
assertEquals(paramsMLN, paramsGraph);
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class TestComputationGraphNetwork method testScoringDataSet.
@Test
public void testScoringDataSet() {
ComputationGraphConfiguration configuration = getIrisGraphConfiguration();
ComputationGraph graph = new ComputationGraph(configuration);
graph.init();
MultiLayerConfiguration mlc = getIrisMLNConfiguration();
MultiLayerNetwork net = new MultiLayerNetwork(mlc);
net.init();
DataSetIterator iris = new IrisDataSetIterator(150, 150);
DataSet ds = iris.next();
//Now: set parameters of both networks to be identical. Then feedforward, and check we get the same score
Nd4j.getRandom().setSeed(12345);
int nParams = getNumParams();
INDArray params = Nd4j.rand(1, nParams);
graph.setParams(params.dup());
net.setParams(params.dup());
double scoreMLN = net.score(ds, false);
double scoreCG = graph.score(ds, false);
assertEquals(scoreMLN, scoreCG, 1e-4);
}
use of org.deeplearning4j.nn.multilayer.MultiLayerNetwork in project deeplearning4j by deeplearning4j.
the class VaeGradientCheckTests method testVaeAsMLP.
@Test
public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
//activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] activFns = { "identity", "tanh" };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, i % 3, 1.0);
}
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (String afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
}
}
}
}
Aggregations