use of org.deeplearning4j.optimize.api.ConvexOptimizer in project deeplearning4j by deeplearning4j.
the class NeuralNetConfigurationTest method testL1L2ByParam.
@Test
public void testL1L2ByParam() {
double l1 = 0.01;
double l2 = 0.07;
int[] nIns = { 4, 3, 3 };
int[] nOuts = { 3, 3, 3 };
int oldScore = 1;
int newScore = 1;
int iteration = 3;
INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(8).regularization(true).l1(l1).l2(l2).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build()).layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net);
opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
assertEquals(l1, net.getLayer(0).conf().getL1ByParam("W"), 1e-4);
assertEquals(0.0, net.getLayer(0).conf().getL1ByParam("b"), 0.0);
assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("beta"), 0.0);
assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("gamma"), 0.0);
assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("mean"), 0.0);
assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("var"), 0.0);
assertEquals(l2, net.getLayer(2).conf().getL2ByParam("W"), 1e-4);
assertEquals(0.0, net.getLayer(2).conf().getL2ByParam("b"), 0.0);
}
use of org.deeplearning4j.optimize.api.ConvexOptimizer in project deeplearning4j by deeplearning4j.
the class NeuralNetConfigurationTest method testLearningRateByParam.
@Test
public void testLearningRateByParam() {
double lr = 0.01;
double biasLr = 0.02;
int[] nIns = { 4, 3, 3 };
int[] nOuts = { 3, 3, 3 };
int oldScore = 1;
int newScore = 1;
int iteration = 3;
INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(lr).biasLearningRate(biasLr).build()).layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).learningRate(0.7).build()).layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net);
opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
assertEquals(lr, net.getLayer(0).conf().getLearningRateByParam("W"), 1e-4);
assertEquals(biasLr, net.getLayer(0).conf().getLearningRateByParam("b"), 1e-4);
assertEquals(0.7, net.getLayer(1).conf().getLearningRateByParam("gamma"), 1e-4);
//From global LR
assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("W"), 1e-4);
//From global LR
assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("b"), 1e-4);
}
use of org.deeplearning4j.optimize.api.ConvexOptimizer in project deeplearning4j by deeplearning4j.
the class TestOptimizers method testRastriginFnMultipleStepsHelper.
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];
for (int i = 0; i <= nOptIter; i++) {
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).miniBatch(false).learningRate(1e-2).layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.ADAGRAD).build()).build();
//Normally done by ParamInitializers, but obviously that isn't done here
conf.addVariable("W");
Model m = new RastriginFunctionModel(100, conf);
int nParams = m.numParams();
if (i == 0) {
m.computeGradientAndScore();
//Before optimization
scores[0] = m.score();
} else {
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.getUpdater().setStateViewArray((Layer) m, Nd4j.createUninitialized(new int[] { 1, nParams }, 'c'), true);
opt.optimize();
m.computeGradientAndScore();
scores[i] = m.score();
assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
}
}
if (PRINT_OPT_RESULTS) {
System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": " + oa);
System.out.println(Arrays.toString(scores));
}
for (int i = 1; i < scores.length; i++) {
if (i == 1) {
//Require at least one step of improvement
assertTrue(scores[i] <= scores[i - 1]);
} else {
assertTrue(scores[i] <= scores[i - 1]);
}
}
}
use of org.deeplearning4j.optimize.api.ConvexOptimizer in project deeplearning4j by deeplearning4j.
the class TestOptimizers method testSphereFnOptHelper.
public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) {
if (PRINT_OPT_RESULTS)
System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= " + nDimensions);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter).iterations(100).learningRate(1e-2).layer(new RBM.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build();
//Normally done by ParamInitializers, but obviously that isn't done here
conf.addVariable("W");
Random rng = new DefaultRandom(12345L);
org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
Model m = new SphereFunctionModel(nDimensions, dist, conf);
m.computeGradientAndScore();
double scoreBefore = m.score();
assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore));
if (PRINT_OPT_RESULTS) {
System.out.println("Before:");
System.out.println(scoreBefore);
System.out.println(m.params());
}
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.setupSearchState(m.gradientAndScore());
opt.optimize();
m.computeGradientAndScore();
double scoreAfter = m.score();
assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter));
if (PRINT_OPT_RESULTS) {
System.out.println("After:");
System.out.println(scoreAfter);
System.out.println(m.params());
}
//Expected behaviour after optimization:
//(a) score is better (lower) after optimization.
//(b) Parameters are closer to minimum after optimization (TODO)
assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")", scoreAfter < scoreBefore);
}
use of org.deeplearning4j.optimize.api.ConvexOptimizer in project deeplearning4j by deeplearning4j.
the class TestOptimizers method testSphereFnMultipleStepsHelper.
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, int maxNumLineSearchIter) {
double[] scores = new double[nOptIter + 1];
for (int i = 0; i <= nOptIter; i++) {
Random rng = new DefaultRandom(12345L);
org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(maxNumLineSearchIter).iterations(i).learningRate(0.1).layer(new DenseLayer.Builder().nIn(1).nOut(1).updater(Updater.SGD).build()).build();
//Normally done by ParamInitializers, but obviously that isn't done here
conf.addVariable("W");
Model m = new SphereFunctionModel(100, dist, conf);
if (i == 0) {
m.computeGradientAndScore();
//Before optimization
scores[0] = m.score();
} else {
ConvexOptimizer opt = getOptimizer(oa, conf, m);
opt.optimize();
m.computeGradientAndScore();
scores[i] = m.score();
assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
}
}
if (PRINT_OPT_RESULTS) {
System.out.println("Multiple optimization iterations (" + nOptIter + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": " + oa);
System.out.println(Arrays.toString(scores));
}
for (int i = 1; i < scores.length; i++) {
assertTrue(scores[i] <= scores[i - 1]);
}
//Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter
assertTrue(scores[scores.length - 1] < 1.0);
}
Aggregations