use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.
the class BNGradientCheckTest method testGradientBNWithCNNandSubsamplingCompGraph.
@Test
public void testGradientBNWithCNNandSubsamplingCompGraph() {
//Parameterized test, testing combinations of:
// (a) activation function
// (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation')
// (c) Loss function (with specified output activations)
// (d) l1 and l2 values
Activation[] activFns = { Activation.SIGMOID, Activation.TANH, Activation.IDENTITY };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunctions.LossFunction[] lossFunctions = { LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
double[] l2vals = { 0.0, 0.1, 0.1 };
//i.e., use l2vals[j] with l1vals[j]
double[] l1vals = { 0.0, 0.0, 0.2 };
Nd4j.getRandom().setSeed(12345);
int minibatch = 10;
int depth = 2;
int hw = 5;
int nOut = 3;
INDArray input = Nd4j.rand(new int[] { minibatch, depth, hw, hw });
INDArray labels = Nd4j.zeros(minibatch, nOut);
Random r = new Random(12345);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, r.nextInt(nOut), 1.0);
}
DataSet ds = new DataSet(input, labels);
for (Activation afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < l2vals.length; j++) {
LossFunctions.LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).regularization(l1vals[j] > 0 || l2vals[j] > 0).l1(l1vals[j]).l2(l2vals[j]).optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT).updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder().addInputs("in").addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3).activation(afn).build(), "in").addLayer("1", new BatchNormalization.Builder().build(), "0").addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX).kernelSize(2, 2).stride(1, 1).build(), "1").addLayer("3", new BatchNormalization(), "2").addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3").addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation).nOut(nOut).build(), "4").setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)).pretrain(false).backprop(true).build();
ComputationGraph net = new ComputationGraph(conf);
net.init();
String name = new Object() {
}.getClass().getEnclosingMethod().getName();
if (doLearningFirst) {
//Run a number of iterations of learning
net.setInput(0, ds.getFeatures());
net.setLabels(ds.getLabels());
net.computeGradientAndScore();
double scoreBefore = net.score();
for (int k = 0; k < 5; k++) net.fit(ds);
net.computeGradientAndScore();
double scoreAfter = net.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < 0.9 * scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]);
for (int k = 0; k < net.getNumLayers(); k++) System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, new INDArray[] { input }, new INDArray[] { labels });
assertTrue(gradOK);
}
}
}
}
}
use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testGradientGravesBidirectionalLSTMFull.
@Test
public void testGradientGravesBidirectionalLSTMFull() {
Activation[] activFns = { Activation.TANH, Activation.SOFTSIGN };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
Activation[] outputActivations = { Activation.SOFTMAX, Activation.TANH };
int timeSeriesLength = 4;
int nIn = 2;
int layerSize = 2;
int nOut = 2;
int miniBatchSize = 3;
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] { i, idx, j }, 1.0f);
}
}
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
for (Activation afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
Activation outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0);
if (l1 > 0.0)
conf.l1(l1);
if (l2 > 0.0)
conf.l2(l2);
if (biasL2[k] > 0)
conf.l2Bias(biasL2[k]);
if (biasL1[k] > 0)
conf.l1Bias(biasL1[k]);
MultiLayerConfiguration mlc = conf.seed(12345L).list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(mlc);
mln.init();
if (PRINT_RESULTS) {
System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
use of org.nd4j.linalg.activations.Activation in project deeplearning4j by deeplearning4j.
the class TestMasking method testPerOutputMaskingMLN.
@Test
public void testPerOutputMaskingMLN() {
//Idea: for per-output masking, the contents of the masked label entries should make zero difference to either
// the score or the gradients
int nIn = 6;
int layerSize = 4;
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int nOut = labelMask.size(1);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
net.setInput(features);
net.setLabels(labels);
net.computeGradientAndScore();
double score1 = net.score();
INDArray grad1 = net.gradient().gradient();
//Now: change the label values for the masked steps. The
INDArray maskZeroLocations = Nd4j.getExecutioner().execAndReturn(new Not(labelMask.dup()));
INDArray rand = Nd4j.rand(maskZeroLocations.shape()).muli(0.5);
//Only the masked values are changed
INDArray newLabels = labels.add(rand.muli(maskZeroLocations));
net.setLabels(newLabels);
net.computeGradientAndScore();
assertNotEquals(labels, newLabels);
double score2 = net.score();
INDArray grad2 = net.gradient().gradient();
assertEquals(score1, score2, 1e-6);
assertEquals(grad1, grad2);
//Do the same for CompGraph
ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(conf2);
graph.init();
graph.setLayerMaskArrays(null, new INDArray[] { labelMask });
graph.setInputs(features);
graph.setLabels(labels);
graph.computeGradientAndScore();
double gScore1 = graph.score();
INDArray gGrad1 = graph.gradient().gradient();
graph.setLabels(newLabels);
graph.computeGradientAndScore();
double gScore2 = graph.score();
INDArray gGrad2 = graph.gradient().gradient();
assertEquals(gScore1, gScore2, 1e-6);
assertEquals(gGrad1, gGrad2);
}
}
}
use of org.nd4j.linalg.activations.Activation in project nd4j by deeplearning4j.
the class SameDiffTests method testActivationBackprop.
@Test
public void testActivationBackprop() {
Activation[] afns = new Activation[] { Activation.TANH, Activation.SIGMOID, Activation.ELU, Activation.SOFTPLUS, Activation.SOFTSIGN, Activation.HARDTANH, // WRONG output - see issue https://github.com/deeplearning4j/nd4j/issues/2426
Activation.CUBE, // JVM crash
Activation.RELU, // JVM crash
Activation.LEAKYRELU };
for (Activation a : afns) {
SameDiff sd = SameDiff.create();
INDArray inArr = Nd4j.linspace(-3, 3, 7);
INDArray labelArr = Nd4j.linspace(-3, 3, 7).muli(0.5);
SDVariable in = sd.var("in", inArr.dup());
// System.out.println("inArr: " + inArr);
INDArray outExp;
SDVariable out;
switch(a) {
case ELU:
out = sd.elu("out", in);
outExp = Transforms.elu(inArr, true);
break;
case HARDTANH:
out = sd.hardTanh("out", in);
outExp = Transforms.hardTanh(inArr, true);
break;
case LEAKYRELU:
out = sd.leakyRelu("out", in, 0.01);
outExp = Transforms.leakyRelu(inArr, true);
break;
case RELU:
out = sd.relu("out", in, 0.0);
outExp = Transforms.relu(inArr, true);
break;
case SIGMOID:
out = sd.sigmoid("out", in);
outExp = Transforms.sigmoid(inArr, true);
break;
case SOFTPLUS:
out = sd.softplus("out", in);
outExp = Transforms.softPlus(inArr, true);
break;
case SOFTSIGN:
out = sd.softsign("out", in);
outExp = Transforms.softsign(inArr, true);
break;
case TANH:
out = sd.tanh("out", in);
outExp = Transforms.tanh(inArr, true);
break;
case CUBE:
out = sd.cube("out", in);
outExp = Transforms.pow(inArr, 3, true);
break;
default:
throw new RuntimeException(a.toString());
}
// Sum squared error loss:
SDVariable label = sd.var("label", labelArr.dup());
SDVariable diff = label.sub("diff", out);
SDVariable sqDiff = diff.mul("sqDiff", diff);
// Loss function...
SDVariable totSum = sd.sum("totSum", sqDiff, Integer.MAX_VALUE);
sd.exec();
INDArray outAct = sd.getVariable("out").getArr();
assertEquals(a.toString(), outExp, outAct);
// L = sum_i (label - out)^2
// dL/dOut = 2(out - label)
INDArray dLdOutExp = outExp.sub(labelArr).mul(2);
INDArray dLdInExp = a.getActivationFunction().backprop(inArr.dup(), dLdOutExp.dup()).getFirst();
sd.execBackwards();
SameDiff gradFn = sd.getFunction("grad");
INDArray dLdOutAct = gradFn.getVariable("out-grad").getArr();
INDArray dLdInAct = gradFn.getVariable("in-grad").getArr();
assertEquals(a.toString(), dLdOutExp, dLdOutAct);
assertEquals(a.toString(), dLdInExp, dLdInAct);
}
}
Aggregations