use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.
the class GradientCheckTestsMasking method testPerOutputMaskingMLP.
@Test
public void testPerOutputMaskingMLP() {
int nIn = 6;
int layerSize = 4;
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int nOut = labelMask.size(1);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
String msg = "testPerOutputMaskingMLP(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a;
System.out.println(msg);
boolean gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, features, labels);
assertTrue(msg, gradOK);
}
}
}
use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.
the class LossFunctionGradientCheck method lossFunctionGradientCheck.
@Test
public void lossFunctionGradientCheck() {
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
String[] outputActivationFn = new String[] { //xent
"sigmoid", //xent
"sigmoid", //cosine
"tanh", //hinge -> trying to predict 1 or -1
"tanh", //kld -> probab so should be between 0 and 1
"sigmoid", //kld + softmax
"softmax", //l1
"tanh", //l1
"rationaltanh", //l1 + softmax
"softmax", //l2
"tanh", //l2 + softmax
"softmax", //mae
"identity", //mae + softmax
"softmax", //mape
"identity", //mape + softmax
"softmax", //mcxent
"softmax", //mse
"identity", //mse + softmax
"softmax", //msle - requires positive labels/activations due to log
"sigmoid", //msle + softmax
"softmax", //nll
"sigmoid", //nll + softmax
"softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
"sigmoid", //squared hinge
"tanh" };
int[] nOut = new int[] { //xent
1, //xent
3, //cosine
5, //hinge
3, //kld
3, //kld + softmax
3, //l1
3, //l1
3, //l1 + softmax
3, //l2
3, //l2 + softmax
3, //mae
3, //mae + softmax
3, //mape
3, //mape + softmax
3, //mcxent
3, //mse
3, //mse + softmax
3, //msle
3, //msle + softmax
3, //nll
3, //nll + softmax
3, //poisson
3, //squared hinge
3 };
int[] minibatchSizes = new int[] { 1, 3 };
// int[] minibatchSizes = new int[]{3};
List<String> passed = new ArrayList<>();
List<String> failed = new ArrayList<>();
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < minibatchSizes.length; j++) {
String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(4).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).nIn(4).nOut(nOut[i]).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
INDArray input = inOut[0];
INDArray labels = inOut[1];
log.info(" ***** Starting test: {} *****", testName);
// System.out.println(Arrays.toString(labels.data().asDouble()));
// System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
// System.out.println(net.score(new DataSet(input,labels)));
boolean gradOK;
try {
gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
} catch (Exception e) {
e.printStackTrace();
failed.add(testName + "\t" + "EXCEPTION");
continue;
}
if (gradOK) {
passed.add(testName);
} else {
failed.add(testName);
}
System.out.println("\n\n");
}
}
System.out.println("---- Passed ----");
for (String s : passed) {
System.out.println(s);
}
System.out.println("---- Failed ----");
for (String s : failed) {
System.out.println(s);
}
assertEquals("Tests failed", 0, failed.size());
}
use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.
the class TestMasking method testPerOutputMaskingMLN.
@Test
public void testPerOutputMaskingMLN() {
//Idea: for per-output masking, the contents of the masked label entries should make zero difference to either
// the score or the gradients
int nIn = 6;
int layerSize = 4;
INDArray mask1 = Nd4j.create(new double[] { 1, 0, 0, 1, 0 });
INDArray mask3 = Nd4j.create(new double[][] { { 1, 1, 1, 1, 1 }, { 0, 1, 0, 1, 0 }, { 1, 0, 0, 1, 1 } });
INDArray[] labelMasks = new INDArray[] { mask1, mask3 };
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity
new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported
new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
Activation[] act = new Activation[] { //XENT
Activation.SIGMOID, //Hinge
Activation.TANH, //KLD
Activation.SIGMOID, //KLD + softmax
Activation.SOFTMAX, //L1
Activation.TANH, //L2
Activation.TANH, //MAE
Activation.TANH, //MAE + softmax
Activation.SOFTMAX, //MAPE
Activation.TANH, //MAPE + softmax
Activation.SOFTMAX, //MCXENT + sigmoid
Activation.SIGMOID, //MSE
Activation.TANH, //MSE + softmax
Activation.SOFTMAX, //MSLE - needs positive labels/activations (due to log)
Activation.SIGMOID, //MSLE + softmax
Activation.SOFTMAX, //NLL
Activation.SIGMOID, //Poisson
Activation.SIGMOID, //Squared hinge
Activation.TANH };
for (INDArray labelMask : labelMasks) {
int minibatch = labelMask.size(0);
int nOut = labelMask.size(1);
for (int i = 0; i < lossFunctions.length; i++) {
ILossFunction lf = lossFunctions[i];
Activation a = act[i];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).list().layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build()).layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build()).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
net.setLayerMaskArrays(null, labelMask);
INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345);
INDArray features = fl[0];
INDArray labels = fl[1];
net.setInput(features);
net.setLabels(labels);
net.computeGradientAndScore();
double score1 = net.score();
INDArray grad1 = net.gradient().gradient();
//Now: change the label values for the masked steps. The
INDArray maskZeroLocations = Nd4j.getExecutioner().execAndReturn(new Not(labelMask.dup()));
INDArray rand = Nd4j.rand(maskZeroLocations.shape()).muli(0.5);
//Only the masked values are changed
INDArray newLabels = labels.add(rand.muli(maskZeroLocations));
net.setLabels(newLabels);
net.computeGradientAndScore();
assertNotEquals(labels, newLabels);
double score2 = net.score();
INDArray grad2 = net.gradient().gradient();
assertEquals(score1, score2, 1e-6);
assertEquals(grad1, grad2);
//Do the same for CompGraph
ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder().updater(Updater.NONE).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).seed(12345).graphBuilder().addInputs("in").addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH).build(), "in").addLayer("1", new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf).activation(a).build(), "0").setOutputs("1").build();
ComputationGraph graph = new ComputationGraph(conf2);
graph.init();
graph.setLayerMaskArrays(null, new INDArray[] { labelMask });
graph.setInputs(features);
graph.setLabels(labels);
graph.computeGradientAndScore();
double gScore1 = graph.score();
INDArray gGrad1 = graph.gradient().gradient();
graph.setLabels(newLabels);
graph.computeGradientAndScore();
double gScore2 = graph.score();
INDArray gGrad2 = graph.gradient().gradient();
assertEquals(gScore1, gScore2, 1e-6);
assertEquals(gGrad1, gGrad2);
}
}
}
use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.
the class CenterLossOutputLayer method computeScore.
/** Compute score after labels and input have been set.
* @param fullNetworkL1 L1 regularization term for the entire network
* @param fullNetworkL2 L2 regularization term for the entire network
* @param training whether score should be calculated at train or test time (this affects things like application of
* dropout, etc)
* @return score (loss function)
*/
@Override
public double computeScore(double fullNetworkL1, double fullNetworkL2, boolean training) {
if (input == null || labels == null)
throw new IllegalStateException("Cannot calculate score without input and labels");
this.fullNetworkL1 = fullNetworkL1;
this.fullNetworkL2 = fullNetworkL2;
INDArray preOut = preOutput2d(training);
// center loss has two components
// the first enforces inter-class dissimilarity, the second intra-class dissimilarity (squared l2 norm of differences)
ILossFunction interClassLoss = layerConf().getLossFn();
// calculate the intra-class score component
INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
INDArray centersForExamples = labels.mmul(centers);
// double intraClassScore = intraClassLoss.computeScore(centersForExamples, input, Activation.IDENTITY.getActivationFunction(), maskArray, false);
INDArray norm2DifferenceSquared = input.sub(centersForExamples).norm2(1);
norm2DifferenceSquared.muli(norm2DifferenceSquared);
double sum = norm2DifferenceSquared.sumNumber().doubleValue();
double lambda = layerConf().getLambda();
double intraClassScore = lambda / 2.0 * sum;
// intraClassScore = intraClassScore * layerConf().getLambda() / 2;
if (System.getenv("PRINT_CENTERLOSS") != null) {
System.out.println("Center loss is " + intraClassScore);
}
// now calculate the inter-class score component
double interClassScore = interClassLoss.computeScore(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray, false);
double score = interClassScore + intraClassScore;
score += fullNetworkL1 + fullNetworkL2;
score /= getInputMiniBatchSize();
this.score = score;
return score;
}
use of org.nd4j.linalg.lossfunctions.ILossFunction in project deeplearning4j by deeplearning4j.
the class RnnOutputLayer method computeScoreForExamples.
/**Compute the score for each example individually, after labels and input have been set.
*
* @param fullNetworkL1 L1 regularization term for the entire network (or, 0.0 to not include regularization)
* @param fullNetworkL2 L2 regularization term for the entire network (or, 0.0 to not include regularization)
* @return A column INDArray of shape [numExamples,1], where entry i is the score of the ith example
*/
@Override
public INDArray computeScoreForExamples(double fullNetworkL1, double fullNetworkL2) {
if (input == null || labels == null)
throw new IllegalStateException("Cannot calculate score without input and labels");
INDArray preOut = preOutput2d(false);
ILossFunction lossFunction = layerConf().getLossFn();
INDArray scoreArray = lossFunction.computeScoreArray(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);
//scoreArray: shape [minibatch*timeSeriesLength, 1]
//Reshape it to [minibatch, timeSeriesLength] then sum over time step
INDArray scoreArrayTs = TimeSeriesUtils.reshapeVectorToTimeSeriesMask(scoreArray, input.size(0));
INDArray summedScores = scoreArrayTs.sum(1);
double l1l2 = fullNetworkL1 + fullNetworkL2;
if (l1l2 != 0.0) {
summedScores.addi(l1l2);
}
return summedScores;
}
Aggregations