use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.
the class VaeGradientCheckTests method testVaeAsMLP.
@Test
public void testVaeAsMLP() {
//Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output
//This gradient check tests this part
//activation functions such as relu and hardtanh: may randomly fail due to discontinuities
String[] activFns = { "identity", "tanh" };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
int[][] encoderLayerSizes = new int[][] { { 5 }, { 5, 6 } };
int[][] decoderLayerSizes = new int[][] { { 6 }, { 7, 8 } };
Nd4j.getRandom().setSeed(12345);
for (int minibatch : new int[] { 1, 5 }) {
INDArray input = Nd4j.rand(minibatch, 4);
INDArray labels = Nd4j.create(minibatch, 3);
for (int i = 0; i < minibatch; i++) {
labels.putScalar(i, i % 3, 1.0);
}
for (int ls = 0; ls < encoderLayerSizes.length; ls++) {
int[] encoderSizes = encoderLayerSizes[ls];
int[] decoderSizes = decoderLayerSizes[ls];
for (String afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).l2Bias(biasL2[k]).l1Bias(biasL1[k]).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(1.0).seed(12345L).list().layer(0, new VariationalAutoencoder.Builder().nIn(4).nOut(3).encoderLayerSizes(encoderSizes).decoderLayerSizes(decoderSizes).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nIn(3).nOut(3).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1;
if (PRINT_RESULTS) {
System.out.println(msg);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
assertTrue(msg, gradOK);
}
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.
the class MultiNeuralNetConfLayerBuilderTest method testNeuralNetConfigAPI.
@Test
public void testNeuralNetConfigAPI() {
LossFunction newLoss = LossFunction.SQUARED_LOSS;
int newNumIn = numIn + 1;
int newNumOut = numOut + 1;
WeightInit newWeight = WeightInit.UNIFORM;
double newDrop = 0.5;
int[] newFS = new int[] { 3, 3 };
int newFD = 7;
int[] newStride = new int[] { 3, 3 };
Convolution.Type newConvType = Convolution.Type.SAME;
PoolingType newPoolType = PoolingType.AVG;
double newCorrupt = 0.5;
double newSparsity = 0.5;
HiddenUnit newHidden = HiddenUnit.BINARY;
VisibleUnit newVisible = VisibleUnit.BINARY;
MultiLayerConfiguration multiConf1 = new NeuralNetConfiguration.Builder().list().layer(0, new DenseLayer.Builder().nIn(newNumIn).nOut(newNumOut).activation(act).build()).layer(1, new DenseLayer.Builder().nIn(newNumIn + 1).nOut(newNumOut + 1).activation(act).build()).build();
NeuralNetConfiguration firstLayer = multiConf1.getConf(0);
NeuralNetConfiguration secondLayer = multiConf1.getConf(1);
assertFalse(firstLayer.equals(secondLayer));
}
use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testRbm.
@Test
public void testRbm() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
RBM.HiddenUnit[] hiddenFunc = { RBM.HiddenUnit.BINARY, RBM.HiddenUnit.RECTIFIED };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunction[] lossFunctions = { LossFunction.MSE, LossFunction.KL_DIVERGENCE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "sigmoid" };
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
iter.setPreProcessor(scaler);
DataSet ds = iter.next();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
double[] l2vals = { 0.4, 0.0, 0.4 };
//i.e., use l2vals[i] with l1vals[i]
double[] l1vals = { 0.0, 0.5, 0.5 };
for (RBM.HiddenUnit hidunit : hiddenFunc) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).l2(l2).l1(l1).learningRate(1.0).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(12345L).list().layer(0, new RBM.Builder(hidunit, RBM.VisibleUnit.BINARY).nIn(4).nOut(3).weightInit(WeightInit.UNIFORM).updater(Updater.SGD).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).weightInit(WeightInit.XAVIER).updater(Updater.SGD).activation(outputActivation).build()).pretrain(false).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + hidunit.toString() + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testAutoEncoder.
@Test
public void testAutoEncoder() {
//As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied
//Need to run gradient through updater, so that L2 can be applied
String[] activFns = { "sigmoid", "tanh" };
//If true: run some backprop steps first
boolean[] characteristic = { false, true };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
DataNormalization scaler = new NormalizerMinMaxScaler();
DataSetIterator iter = new IrisDataSetIterator(150, 150);
scaler.fit(iter);
iter.setPreProcessor(scaler);
DataSet ds = iter.next();
INDArray input = ds.getFeatureMatrix();
INDArray labels = ds.getLabels();
NormalizerStandardize norm = new NormalizerStandardize();
norm.fit(ds);
norm.transform(ds);
double[] l2vals = { 0.2, 0.0, 0.2 };
//i.e., use l2vals[i] with l1vals[i]
double[] l1vals = { 0.0, 0.3, 0.3 };
for (String afn : activFns) {
for (boolean doLearningFirst : characteristic) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
Nd4j.getRandom().setSeed(12345);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().regularization(true).learningRate(1.0).l2(l2).l1(l1).optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).seed(12345L).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.SGD).list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3).activation(afn).build()).layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3).activation(outputActivation).build()).pretrain(true).backprop(true).build();
MultiLayerNetwork mln = new MultiLayerNetwork(conf);
mln.init();
if (doLearningFirst) {
//Run a number of iterations of learning
mln.setInput(ds.getFeatures());
mln.setLabels(ds.getLabels());
mln.computeGradientAndScore();
double scoreBefore = mln.score();
for (int j = 0; j < 10; j++) mln.fit(ds);
mln.computeGradientAndScore();
double scoreAfter = mln.score();
//Can't test in 'characteristic mode of operation' if not learning
String msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")";
assertTrue(msg, scoreAfter < scoreBefore);
}
if (PRINT_RESULTS) {
System.out.println("testGradientMLP2LayerIrisSimpleRandom() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
}
use of org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction in project deeplearning4j by deeplearning4j.
the class GradientCheckTests method testGradientGravesLSTMFull.
@Test
public void testGradientGravesLSTMFull() {
String[] activFns = { "tanh", "softsign" };
LossFunction[] lossFunctions = { LossFunction.MCXENT, LossFunction.MSE };
//i.e., lossFunctions[i] used with outputActivations[i] here
String[] outputActivations = { "softmax", "tanh" };
int timeSeriesLength = 8;
int nIn = 7;
int layerSize = 9;
int nOut = 4;
int miniBatchSize = 6;
Random r = new Random(12345L);
INDArray input = Nd4j.zeros(miniBatchSize, nIn, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < nIn; j++) {
for (int k = 0; k < timeSeriesLength; k++) {
input.putScalar(new int[] { i, j, k }, r.nextDouble() - 0.5);
}
}
}
INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength);
for (int i = 0; i < miniBatchSize; i++) {
for (int j = 0; j < timeSeriesLength; j++) {
int idx = r.nextInt(nOut);
labels.putScalar(new int[] { i, idx, j }, 1.0f);
}
}
//use l2vals[i] with l1vals[i]
double[] l2vals = { 0.4, 0.0, 0.4, 0.4 };
double[] l1vals = { 0.0, 0.0, 0.5, 0.0 };
double[] biasL2 = { 0.0, 0.0, 0.0, 0.2 };
double[] biasL1 = { 0.0, 0.0, 0.6, 0.0 };
for (String afn : activFns) {
for (int i = 0; i < lossFunctions.length; i++) {
for (int k = 0; k < l2vals.length; k++) {
LossFunction lf = lossFunctions[i];
String outputActivation = outputActivations[i];
double l2 = l2vals[k];
double l1 = l1vals[k];
NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder().regularization(l1 > 0.0 || l2 > 0.0).seed(12345L);
if (l1 > 0.0)
conf.l1(l1);
if (l2 > 0.0)
conf.l2(l2);
if (biasL2[k] > 0)
conf.l2Bias(biasL2[k]);
if (biasL1[k] > 0)
conf.l1Bias(biasL1[k]);
NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).activation(afn).updater(Updater.NONE).build()).layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize).nOut(nOut).weightInit(WeightInit.DISTRIBUTION).dist(new NormalDistribution(0, 1)).updater(Updater.NONE).build()).pretrain(false).backprop(true);
MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build());
mln.init();
if (PRINT_RESULTS) {
System.out.println("testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1);
for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams());
}
boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels);
String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1;
assertTrue(msg, gradOK);
}
}
}
}
Aggregations