use of org.deeplearning4j.nn.conf.layers.LossLayer in project deeplearning4j by deeplearning4j.
the class LossFunctionGradientCheck method lossFunctionGradientCheckLossLayer.
@Test
public void lossFunctionGradientCheckLossLayer() {
ILossFunction[] lossFunctions = new ILossFunction[] { new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge() };
String[] outputActivationFn = new String[] { //xent
"sigmoid", //xent
"sigmoid", //cosine
"tanh", //hinge -> trying to predict 1 or -1
"tanh", //kld -> probab so should be between 0 and 1
"sigmoid", //kld + softmax
"softmax", //l1
"tanh", //l1 + softmax
"softmax", //l2
"tanh", //l2 + softmax
"softmax", //mae
"identity", //mae + softmax
"softmax", //mape
"identity", //mape + softmax
"softmax", //mcxent
"softmax", //mse
"identity", //mse + softmax
"softmax", //msle - requires positive labels/activations due to log
"sigmoid", //msle + softmax
"softmax", //nll
"sigmoid", //nll + softmax
"softmax", //poisson - requires positive predictions due to log... not sure if this is the best option
"sigmoid", //squared hinge
"tanh" };
int[] nOut = new int[] { //xent
1, //xent
3, //cosine
5, //hinge
3, //kld
3, //kld + softmax
3, //l1
3, //l1 + softmax
3, //l2
3, //l2 + softmax
3, //mae
3, //mae + softmax
3, //mape
3, //mape + softmax
3, //mcxent
3, //mse
3, //mse + softmax
3, //msle
3, //msle + softmax
3, //nll
3, //nll + softmax
3, //poisson
3, //squared hinge
3 };
int[] minibatchSizes = new int[] { 1, 3 };
// int[] minibatchSizes = new int[]{3};
List<String> passed = new ArrayList<>();
List<String> failed = new ArrayList<>();
for (int i = 0; i < lossFunctions.length; i++) {
for (int j = 0; j < minibatchSizes.length; j++) {
String testName = lossFunctions[i] + " - " + outputActivationFn[i] + " - minibatchSize = " + minibatchSizes[j];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().iterations(1).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345).updater(Updater.NONE).regularization(false).weightInit(WeightInit.DISTRIBUTION).dist(new UniformDistribution(-2, 2)).list().layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()).layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]).activation(outputActivationFn[i]).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
assertTrue(((LossLayer) net.getLayer(1).conf().getLayer()).getLossFn().getClass() == lossFunctions[i].getClass());
INDArray[] inOut = getFeaturesAndLabels(lossFunctions[i], minibatchSizes[j], 4, nOut[i], 12345);
INDArray input = inOut[0];
INDArray labels = inOut[1];
log.info(" ***** Starting test: {} *****", testName);
// System.out.println(Arrays.toString(labels.data().asDouble()));
// System.out.println(Arrays.toString(net.output(input,false).data().asDouble()));
// System.out.println(net.score(new DataSet(input,labels)));
boolean gradOK;
try {
gradOK = GradientCheckUtil.checkGradients(net, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
} catch (Exception e) {
e.printStackTrace();
failed.add(testName + "\t" + "EXCEPTION");
continue;
}
if (gradOK) {
passed.add(testName);
} else {
failed.add(testName);
}
System.out.println("\n\n");
}
}
System.out.println("---- Passed ----");
for (String s : passed) {
System.out.println(s);
}
System.out.println("---- Failed ----");
for (String s : failed) {
System.out.println(s);
}
assertEquals("Tests failed", 0, failed.size());
}
Aggregations