use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testPerOutputMaskingRnn.
@Test
public void testPerOutputMaskingRnn() {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking)
int nIn = 4;
int layerSize = 4;
int nOut = 4;
//1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
//1 example, TS length 1
INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
//3 examples, TS length 3
INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int tsLength = labelMask.size(2);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
assertTrue(msg, gradOK);
//Check the equivalent compgraph:
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(cg);
graph.init();
net.setLayerMaskArrays(null, labelMask);
gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
assertTrue(msg + " (compgraph)", gradOK);
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.
@Test
public void lossFunctionGradientCheckLossLayer() {
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
String[] outputActivationFn = new String[] { //xent
"sigmoid", //xent
"sigmoid", //cosine
"tanh", //hinge -> trying to predict 1 or -1
"tanh", //kld -> probab so should be between 0 and 1
"sigmoid", //kld + softmax
"softmax", //l1
"tanh", //l1 + softmax
"softmax", //l2
"tanh", //l2 + softmax
"softmax", //mae
"identity", //mae + softmax
"softmax", //mape
"identity", //mape + softmax
"softmax", //mcxent
"softmax", //mse
"identity", //mse + softmax
"softmax", //msle - requires positive labels/activations due to log
"sigmoid", //msle + softmax
"softmax", //nll
"sigmoid", //nll + softmax
"softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
"sigmoid", //squared hinge
"tanh" };
int[] nOut = new int[] { //xent
1, //xent
3, //cosine
5, //hinge
3, //kld
3, //kld + softmax
3, //l1
3, //l1 + softmax
3, //l2
3, //l2 + softmax
3, //mae
3, //mae + softmax
3, //mape
3, //mape + softmax
3, //mcxent
3, //mse
3, //mse + softmax
3, //msle
3, //msle + softmax
3, //nll
3, //nll + softmax
3, //poisson
3, //squared hinge
3 };
int[] minibatchSizes = new int[] { 1, 3 };
// int[] minibatchSizes = new int[]{3};
List<String> passed = new ArrayList<>();
List<String> failed = new ArrayList<>();
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < minibatchSizes.length; j++) {
String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
INDArray input = inOut[0];
INDArray labels = inOut[1];
log.info(" ***** Starting test: {} *****", testName);
// System.out.println(Arrays.toString(labels.data().asDouble()));
// System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
// System.out.println(net.score(new DataSet(input,labels)));
boolean gradOK;
try {
gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
} catch (Exception e) {
e.printStackTrace();
failed.add(testName + "\t" + "EXCEPTION");
continue;
}
if (gradOK) {
passed.add(testName);
} else {
failed.add(testName);
}
System.out.println("\n\n");
}
}
System.out.println("---- Passed ----");
for (String s : passed) {
System.out.println(s);
}
System.out.println("---- Failed ----");
for (String s : failed) {
System.out.println(s);
}
assertEquals("Tests failed", 0, failed.size());
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class LossFunctionGradientCheck method lossFunctionWeightedGradientCheck.
@Test
public void lossFunctionWeightedGradientCheck() {
INDArray[] weights = new INDArray[] { Nd4j.create(new double[] { 0.2, 0.3, 0.5 }), Nd4j.create(new double[] { 1.0, 0.5, 2.0 }) };
List<String> passed = new ArrayList<>();
List<String> failed = new ArrayList<>();
for (INDArray w : weights) {
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(w), new LossL1(w), new LossL1(w), new LossL2(w), new LossL2(w), new LossMAE(w), new LossMAE(w), new LossMAPE(w), new LossMAPE(w), new LossMCXENT(w), new LossMSE(w), new LossMSE(w), new LossMSLE(w), new LossMSLE(w), new LossNegativeLogLikelihood(w), new LossNegativeLogLikelihood(w) };
String[] outputActivationFn = new String[] { //xent
"sigmoid", //l1
"tanh", //l1 + softmax
"softmax", //l2
"tanh", //l2 + softmax
"softmax", //mae
"identity", //mae + softmax
"softmax", //mape
"identity", //mape + softmax
"softmax", //mcxent
"softmax", //mse
"identity", //mse + softmax
"softmax", //msle - requires positive labels/activations due to log
"sigmoid", //msle + softmax
"softmax", //nll
"sigmoid", //nll + softmax
"softmax" };
int[] minibatchSizes = new int[] { 1, 3 };
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < minibatchSizes.length; j++) {
String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j] + "; weights = " + w;
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-3, 3)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).nIn(4).nOut(3).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, 3, 12345);
INDArray input = inOut[0];
INDArray labels = inOut[1];
log.info(" ***** Starting test: {} *****", testName);
// System.out.println(Arrays.toString(labels.data().asDouble()));
// System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
// System.out.println(net.score(new DataSet(input,labels)));
boolean gradOK;
try {
gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
} catch (Exception e) {
e.printStackTrace();
failed.add(testName + "\t" + "EXCEPTION");
continue;
}
if (gradOK) {
passed.add(testName);
} else {
failed.add(testName);
}
System.out.println("\n\n");
}
}
}
System.out.println("---- Passed ----");
for (String s : passed) {
System.out.println(s);
}
System.out.println("---- Failed ----");
for (String s : failed) {
System.out.println(s);
}
assertEquals("Tests failed", 0, failed.size());
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class VaeGradientCheckTests method testVaePretrain.
@Test
public void testVaePretrain() {
//activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] activFns = { "identity", "identity", "tanh", "tanh" };
String[] pzxAfns = { "identity", "tanh", "identity", "tanh" };
String[] pxzAfns = { "tanh", "identity", "tanh", "identity" };
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
INDArray features = Nd4j.rand(minibatch, 4);
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (int j = 0; j < activFns.length; j++) {
String afn = activFns[j];
String pzxAfn = pzxAfns[j];
String pxzAfn = pxzAfns[j];
//Ideally we'd do the cartesian product of l1/l2 and the activation functions, but that takes too long...
double l2 = l2vals[j];
double l1 = l1vals[j];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[j]).l1Bias(biasL1[j]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).weightInit(WeightInit.XAVIER).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).pzxActivationFunction(pzxAfn).reconstructionDistribution(new GaussianReconstructionDistribution(pxzAfn)).activation(afn).updater(Updater.SGD).build()).pretrain(true).backprop(false).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
mln.initGradientsView();
org.deeplearning4j.nn.api.Layer layer = mln.getLayer(0);
String msg = "testVaePretrain() - activationFn=" + afn + ", p(z|x) afn = " + pzxAfn + ", p(x|z) afn = " + pxzAfn + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int l = 0; l < mln.getnLayers(); l++) System.out.println("Layer " + l + " # params: " + mln.getLayer(l).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradientsPretrainLayer(layer, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, 12345);
assertTrue(msg, gradOK);
}
}
}
}
use of org.deeplearning4j.nn.conf.MultiLayerConfiguration in project deeplearning4j by deeplearning4j.
the class CustomPreprocessorTest method testCustomPreprocessor.
@Test
public void testCustomPreprocessor() {
//First: Ensure that the CustomLayer class is registered
ObjectMapper mapper = NeuralNetConfiguration.mapper();
AnnotatedClass ac = AnnotatedClass.construct(InputPreProcessor.class, mapper.getSerializationConfig().getAnnotationIntrospector(), null);
Collection<NamedType> types = mapper.getSubtypeResolver().collectAndResolveSubtypes(ac, mapper.getSerializationConfig(), mapper.getSerializationConfig().getAnnotationIntrospector());
boolean found = false;
for (NamedType nt : types) {
// System.out.println(nt);
if (nt.getType() == MyCustomPreprocessor.class) {
found = true;
break;
}
}
assertTrue("MyCustomPreprocessor: not registered with NeuralNetConfiguration mapper", found);
//Second: let's create a MultiLayerCofiguration with one, and check JSON and YAML config actually works...
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.1).list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(10).nOut(10).build()).inputPreProcessor(0, new MyCustomPreprocessor()).pretrain(false).backprop(true).build();
String json = conf.toJson();
String yaml = conf.toYaml();
System.out.println(json);
MultiLayerConfiguration confFromJson = MultiLayerConfiguration.fromJson(json);
assertEquals(conf, confFromJson);
MultiLayerConfiguration confFromYaml = MultiLayerConfiguration.fromYaml(yaml);
assertEquals(conf, confFromYaml);
assertTrue(confFromJson.getInputPreProcess(0) instanceof MyCustomPreprocessor);
}
Aggregations