use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsComputationGraph method testL2NormalizeVertex4d.
@Test
public void testL2NormalizeVertex4d() {
Nd4j.getRandom().setSeed(12345);
int h = 4;
int w = 4;
int dIn = 2;
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("in1").addLayer("d1", new ConvolutionLayer.Builder().kernelSize(2, 2).stride(1, 1).nOut(2).build(), "in1").addVertex("norm", new L2NormalizeVertex(), "d1").addLayer("out1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.L2).nOut(2).activation(Activation.IDENTITY).build(), "norm").setOutputs("out1").setInputTypes(InputType.convolutional(h, w, dIn)).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
int[] mbSizes = new int[] { 1, 3, 10 };
for (int minibatch : mbSizes) {
INDArray in1 = Nd4j.rand(new int[] { minibatch, dIn, h, w });
INDArray labels1 = Nd4j.rand(minibatch, 2);
String testName = "testL2NormalizeVertex4d() - minibatch = " + minibatch;
if (PRINT_RESULTS) {
System.out.println(testName);
for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { in1 }, new INDArray[] { labels1 });
assertTrue(testName, gradOK);
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsComputationGraph method testBasicStackUnstack.
@Test
public void testBasicStackUnstack() {
int layerSizes = 2;
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(Activation.TANH).updater(Updater.NONE).learningRate(1.0).graphBuilder().addInputs("in1", "in2").addLayer("d0", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in1").addLayer("d1", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "in2").addVertex("stack", new StackVertex(), "d0", "d1").addLayer("d2", new DenseLayer.Builder().nIn(layerSizes).nOut(layerSizes).build(), "stack").addVertex("u1", new UnstackVertex(0, 2), "d2").addVertex("u2", new UnstackVertex(1, 2), "d2").addLayer("out1", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.L2).nIn(layerSizes).nOut(layerSizes).activation(Activation.IDENTITY).build(), "u1").addLayer("out2", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.L2).nIn(layerSizes).nOut(2).activation(Activation.IDENTITY).build(), "u2").setOutputs("out1", "out2").pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
Nd4j.getRandom().setSeed(12345);
int nParams = graph.numParams();
INDArray newParams = Nd4j.rand(1, nParams);
graph.setParams(newParams);
int[] mbSizes = new int[] { 1, 3, 10 };
for (int minibatch : mbSizes) {
INDArray in1 = Nd4j.rand(minibatch, layerSizes);
INDArray in2 = Nd4j.rand(minibatch, layerSizes);
INDArray labels1 = Nd4j.rand(minibatch, 2);
INDArray labels2 = Nd4j.rand(minibatch, 2);
String testName = "testBasicStackUnstack() - minibatch = " + minibatch;
if (PRINT_RESULTS) {
System.out.println(testName);
for (int j = 0; j < graph.getNumLayers(); j++) System.out.println("Layer " + j + " # params: " + graph.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { in1, in2 }, new INDArray[] { labels1, labels2 });
assertTrue(testName, gradOK);
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testPerOutputMaskingRnn.
@Test
public void testPerOutputMaskingRnn() {
//For RNNs: per-output masking uses 3d masks (same shape as output/labels), as compared to the standard
// 2d masks (used for per *example* masking)
int nIn = 4;
int layerSize = 4;
int nOut = 4;
//1 example, TS length 3
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0 }, new int[] { 1, nOut, 3 }, 'f');
//1 example, TS length 1
INDArray mask2 = Nd4j.create(new double[] { 1, 1, 0, 1 }, new int[] { 1, nOut, 1 }, 'f');
//3 examples, TS length 3
INDArray mask3 = Nd4j.create(new double[] { // step) followed by time index (least frequently)
1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0 }, new int[] { 3, nOut, 3 }, 'f');
INDArray[] labelMasks = new INDArray[] { mask1, mask2, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int tsLength = labelMask.size(2);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, new int[] { minibatch, nIn, tsLength }, new int[] { minibatch, nOut, tsLength }, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
String msg = "testPerOutputMaskingRnn(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
assertTrue(msg, gradOK);
//Check the equivalent compgraph:
Nd4j.getRandom().setSeed(12345);
ComputationGraphConfiguration cg = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 2)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new RnnOutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(cg);
graph.init();
net.setLayerMaskArrays(null, labelMask);
gradOK = GradientCheckUtil.checkGradients(graph, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { features }, new INDArray[] { labels });
assertTrue(msg + " (compgraph)", gradOK);
}
}
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestCompGraphCNN method testCNNComputationGraphKernelTooLarge.
@Test(expected = InvalidInputTypeException.class)
public void testCNNComputationGraphKernelTooLarge() {
int imageWidth = 23;
int imageHeight = 19;
int nChannels = 1;
int classes = 2;
int numSamples = 200;
int kernelHeight = 3;
int kernelWidth = imageWidth;
DataSet trainInput;
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).graphBuilder().addInputs("input").setInputTypes(InputType.convolutional(nChannels, imageWidth, imageHeight)).addLayer("conv1", new ConvolutionLayer.Builder().kernelSize(kernelHeight, kernelWidth).stride(1, 1).nIn(nChannels).nOut(2).weightInit(WeightInit.XAVIER).activation(Activation.RELU).build(), "input").addLayer("pool1", new SubsamplingLayer.Builder().poolingType(SubsamplingLayer.PoolingType.MAX).kernelSize(imageHeight - kernelHeight + 1, 1).stride(1, 1).build(), "conv1").addLayer("output", new OutputLayer.Builder().nOut(classes).build(), "pool1").setOutputs("output").backprop(true).pretrain(false).build();
ComputationGraph model = new ComputationGraph(conf);
model.init();
INDArray emptyFeatures = Nd4j.zeros(numSamples, imageWidth * imageHeight * nChannels);
INDArray emptyLables = Nd4j.zeros(numSamples, classes);
trainInput = new DataSet(emptyFeatures, emptyLables);
model.fit(trainInput);
}
use of org.deeplearning4j.nn.conf.ComputationGraphConfiguration in project deeplearning4j by deeplearning4j.
the class TestCompGraphCNN method testCnnLRN_BN.
@Test
public void testCnnLRN_BN() {
int imageHeight = 40;
int imageWidth = 40;
int nChannels = 1;
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1).seed(123).graphBuilder().addInputs("input").setInputTypes(InputType.convolutional(imageHeight, imageWidth, nChannels)).addLayer("cnn1", new ConvolutionLayer.Builder(new int[] { 2, 2 }, new int[] { 1, 1 }, //Out: 39x39x64
new int[] { 0, 0 }).nIn(nChannels).nOut(64).biasInit(0.2).build(), "input").addLayer("max1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] { 2, 2 }, //Out: 38x38x64
new int[] { 1, 1 }).build(), "cnn1").addLayer("lrn1", new LocalResponseNormalization.Builder(5, 1e-4, 0.75).build(), "max1").addLayer("batchnorm", new BatchNormalization.Builder().nOut(64).build(), "lrn1").addLayer("out", new OutputLayer.Builder().nOut(10).build(), "batchnorm").setOutputs("out").pretrain(false).backprop(true).build();
ComputationGraph graph = new ComputationGraph(conf);
graph.init();
}
Aggregations