use of org.nd4j.linalg.lossfunctions.LossFunctions in project deeplearning4j by deeplearning4j.
the class CNNGradientCheckTest method testGradientCNNL1L2MLN.
@Test
public void testGradientCNNL1L2MLN() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
String[] activFns = { "sigmoid", "tanh" };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunctions.LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).list().layer(0, new ConvolutionLayer.Builder(new int[] { 1, 1 }).nIn(1).nOut(6).weightInit(WeightInit.XAVIER).activation(afn).updater(Updater.NONE).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.NONE).build()).pretrain(false).backprop(true).setInputType(InputType.convolutionalFlat(1, 4, 1));
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String testName = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions in project deeplearning4j by deeplearning4j.
the class CNNGradientCheckTest method testGradientCNNMLN.
@Test
public void testGradientCNNMLN() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
String[] activFns = { "sigmoid", "tanh" };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
DataSet ds = new IrisDataSetIterator(150, 150).next();
ds.normalizeZeroMeanZeroUnitVariance();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
LossFunctions.LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(false).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(Updater.NONE).weightInit(WeightInit.XAVIER).seed(12345L).list().layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()).setInputType(InputType.convolutionalFlat(1, 4, 1)).pretrain(false).backprop(true);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions in project deeplearning4j by deeplearning4j.
the class BNGradientCheckTest method testGradientBNWithCNNandSubsampling.
@Test
public void testGradientBNWithCNNandSubsampling() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
double[] l2vals = { 0.0, 0.1, 0.1 };
//i.e., use l2vals[j] with l1vals[j]
double[] l1vals = { 0.0, 0.0, 0.2 };
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).list().layer(0, new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build()).layer(1, new BatchNormalization.Builder().build()).layer(2, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build()).layer(3, new BatchNormalization()).layer(4, new ActivationLayer.Builder().activation(afn).build()).layer(5, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build()).setInputType(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int k = 0; k < 5; k++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < mln.getnLayers(); k++) System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions in project deeplearning4j by deeplearning4j.
the class BNGradientCheckTest method testGradientDense.
@Test
public void testGradientDense() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
double[] l2vals = { 0.0, 0.1, 0.1 };
//i.e., use l2vals[j] with l1vals[j]
double[] l1vals = { 0.0, 0.0, 0.2 };
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int nIn = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, nIn });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder().regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(4).activation(afn).build()).layer(1, new BatchNormalization.Builder().build()).layer(2, new DenseLayer.Builder().nIn(4).nOut(4).build()).layer(3, new BatchNormalization()).layer(4, new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build()).pretrain(false).backprop(true);
MultiLayerConfiguration conf = builder.build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int k = 0; k < 10; k++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.8 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < mln.getnLayers(); k++) System.out.println("Layer " + k + " # params: " + mln.getLayer(k).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(gradOK);
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions in project deeplearning4j by deeplearning4j.
the class BNGradientCheckTest method testGradientBNWithCNNandSubsamplingCompGraph.
@Test
public void testGradientBNWithCNNandSubsamplingCompGraph() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
double[] l2vals = { 0.0, 0.1, 0.1 };
//i.e., use l2vals[j] with l1vals[j]
double[] l1vals = { 0.0, 0.0, 0.2 };
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build(), "in").addLayer("1", new BatchNormalization.Builder().build(), "0").addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build(), "1").addLayer("3", new BatchNormalization(), "2").addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3").addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build(), "4").setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
double scoreBefore = net.score();
for (int k = 0; k < 5; k++) net.fit(ds);
net.computeGradientAndScore();
double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++) System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
assertTrue(gradOK);
}
}
}
}
}
Aggregations