Search in sources :

Example 1 with NegativeDefaultStepFunction

use of org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction in project deeplearning4j by deeplearning4j.

the class NeuralNetConfigurationTest method testL1L2ByParam.

@Test
public void testL1L2ByParam() {
    double l1 = 0.01;
    double l2 = 0.07;
    int[] nIns = { 4, 3, 3 };
    int[] nOuts = { 3, 3, 3 };
    int oldScore = 1;
    int newScore = 1;
    int iteration = 3;
    INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(8).regularization(true).l1(l1).l2(l2).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).l2(0.5).build()).layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net);
    opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
    assertEquals(l1, net.getLayer(0).conf().getL1ByParam("W"), 1e-4);
    assertEquals(0.0, net.getLayer(0).conf().getL1ByParam("b"), 0.0);
    assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("beta"), 0.0);
    assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("gamma"), 0.0);
    assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("mean"), 0.0);
    assertEquals(0.0, net.getLayer(1).conf().getL2ByParam("var"), 0.0);
    assertEquals(l2, net.getLayer(2).conf().getL2ByParam("W"), 1e-4);
    assertEquals(0.0, net.getLayer(2).conf().getL2ByParam("b"), 0.0);
}
Also used : StochasticGradientDescent(org.deeplearning4j.optimize.solvers.StochasticGradientDescent) BatchNormalization(org.deeplearning4j.nn.conf.layers.BatchNormalization) ConvexOptimizer(org.deeplearning4j.optimize.api.ConvexOptimizer) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) NegativeDefaultStepFunction(org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction) Test(org.junit.Test)

Example 2 with NegativeDefaultStepFunction

use of org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction in project deeplearning4j by deeplearning4j.

the class NeuralNetConfigurationTest method testLearningRateByParam.

@Test
public void testLearningRateByParam() {
    double lr = 0.01;
    double biasLr = 0.02;
    int[] nIns = { 4, 3, 3 };
    int[] nOuts = { 3, 3, 3 };
    int oldScore = 1;
    int newScore = 1;
    int iteration = 3;
    INDArray gradientW = Nd4j.ones(nIns[0], nOuts[0]);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(0.3).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.SGD).learningRate(lr).biasLearningRate(biasLr).build()).layer(1, new BatchNormalization.Builder().nIn(nIns[1]).nOut(nOuts[1]).learningRate(0.7).build()).layer(2, new OutputLayer.Builder().nIn(nIns[2]).nOut(nOuts[2]).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    ConvexOptimizer opt = new StochasticGradientDescent(net.getDefaultConfiguration(), new NegativeDefaultStepFunction(), null, net);
    opt.checkTerminalConditions(gradientW, oldScore, newScore, iteration);
    assertEquals(lr, net.getLayer(0).conf().getLearningRateByParam("W"), 1e-4);
    assertEquals(biasLr, net.getLayer(0).conf().getLearningRateByParam("b"), 1e-4);
    assertEquals(0.7, net.getLayer(1).conf().getLearningRateByParam("gamma"), 1e-4);
    //From global LR
    assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("W"), 1e-4);
    //From global LR
    assertEquals(0.3, net.getLayer(2).conf().getLearningRateByParam("b"), 1e-4);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) StochasticGradientDescent(org.deeplearning4j.optimize.solvers.StochasticGradientDescent) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) NegativeDefaultStepFunction(org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction) BatchNormalization(org.deeplearning4j.nn.conf.layers.BatchNormalization) ConvexOptimizer(org.deeplearning4j.optimize.api.ConvexOptimizer) Test(org.junit.Test)

Example 3 with NegativeDefaultStepFunction

use of org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction in project deeplearning4j by deeplearning4j.

the class BackTrackLineSearch method optimize.

// returns fraction of step size if found a good step
// returns 0.0 if could not step in direction
// step == alam and score == f in book
/**
     * @param parameters      the parameters to optimize
     * @param gradients       the line/rate of change
     * @param searchDirection the point for the line search to go in
     * @return the next step size
     * @throws InvalidStepException
     */
@Override
public double optimize(INDArray parameters, INDArray gradients, INDArray searchDirection) throws InvalidStepException {
    double test, stepMin, step, step2, oldStep, tmpStep;
    double rhs1, rhs2, a, b, disc, score, scoreAtStart, score2;
    minObjectiveFunction = (stepFunction instanceof NegativeDefaultStepFunction || stepFunction instanceof NegativeGradientStepFunction);
    Level1 l1Blas = Nd4j.getBlasWrapper().level1();
    double sum = l1Blas.nrm2(searchDirection);
    double slope = -1f * Nd4j.getBlasWrapper().dot(searchDirection, gradients);
    log.debug("slope = {}", slope);
    INDArray maxOldParams = abs(parameters);
    Nd4j.getExecutioner().exec(new ScalarSetValue(maxOldParams, 1));
    INDArray testMatrix = abs(gradients).divi(maxOldParams);
    test = testMatrix.max(Integer.MAX_VALUE).getDouble(0);
    // initially, step = 1.0, i.e. take full Newton step
    step = 1.0;
    // relative convergence tolerance
    stepMin = relTolx / test;
    oldStep = 0.0;
    step2 = 0.0;
    score = score2 = scoreAtStart = layer.score();
    double bestScore = score;
    double bestStepSize = 1.0;
    if (log.isTraceEnabled()) {
        double norm1 = l1Blas.asum(searchDirection);
        int infNormIdx = l1Blas.iamax(searchDirection);
        double infNorm = FastMath.max(Float.NEGATIVE_INFINITY, searchDirection.getDouble(infNormIdx));
        log.trace("ENTERING BACKTRACK\n");
        log.trace("Entering BackTrackLineSearch, value = " + scoreAtStart + ",\ndirection.oneNorm:" + norm1 + "  direction.infNorm:" + infNorm);
    }
    if (sum > stepMax) {
        log.warn("Attempted step too big. scaling: sum= {}, stepMax= {}", sum, stepMax);
        searchDirection.muli(stepMax / sum);
    }
    //        if (slope >= 0.0) {
    //            throw new InvalidStepException("Slope " + slope + " is >= 0.0. Expect slope < 0.0 when minimizing objective function");
    //        }
    // find maximum lambda
    // converge when (delta x) / x < REL_TOLX for all coordinates.
    // the largest step size that triggers this threshold is precomputed and saved in stepMin
    // look for step size in direction given by "line"
    INDArray candidateParameters = null;
    for (int iteration = 0; iteration < maxIterations; iteration++) {
        if (log.isTraceEnabled()) {
            log.trace("BackTrack loop iteration {} : step={}, oldStep={}", iteration, step, oldStep);
            log.trace("before step, x.1norm: {} \nstep: {} \noldStep: {}", parameters.norm1(Integer.MAX_VALUE), step, oldStep);
        }
        if (step == oldStep)
            throw new IllegalArgumentException("Current step == oldStep");
        // step
        candidateParameters = parameters.dup('f');
        stepFunction.step(candidateParameters, searchDirection, step);
        oldStep = step;
        if (log.isTraceEnabled()) {
            double norm1 = l1Blas.asum(candidateParameters);
            log.trace("after step, x.1norm: " + norm1);
        }
        // check for convergence on delta x
        if ((step < stepMin) || Nd4j.getExecutioner().execAndReturn(new Eps(parameters, candidateParameters, Shape.toOffsetZeroCopy(candidateParameters, 'f'), candidateParameters.length())).sum(Integer.MAX_VALUE).getDouble(0) == candidateParameters.length()) {
            score = setScoreFor(parameters);
            log.debug("EXITING BACKTRACK: Jump too small (stepMin = {}). Exiting and using original params. Score = {}", stepMin, score);
            return 0.0;
        }
        score = setScoreFor(candidateParameters);
        log.debug("Model score after step = {}", score);
        //Score best step size for use if we terminate on maxIterations
        if ((minObjectiveFunction && score < bestScore) || (!minObjectiveFunction && score > bestScore)) {
            bestScore = score;
            bestStepSize = step;
        }
        //Sufficient decrease in cost/loss function (Wolfe condition / Armijo condition)
        if (minObjectiveFunction && score <= scoreAtStart + ALF * step * slope) {
            log.debug("Sufficient decrease (Wolfe cond.), exiting backtrack on iter {}: score={}, scoreAtStart={}", iteration, score, scoreAtStart);
            if (score > scoreAtStart)
                throw new IllegalStateException("Function did not decrease: score = " + score + " > " + scoreAtStart + " = oldScore");
            return step;
        }
        //Sufficient increase in cost/loss function (Wolfe condition / Armijo condition)
        if (!minObjectiveFunction && score >= scoreAtStart + ALF * step * slope) {
            log.debug("Sufficient increase (Wolfe cond.), exiting backtrack on iter {}: score={}, bestScore={}", iteration, score, scoreAtStart);
            if (score < scoreAtStart)
                throw new IllegalStateException("Function did not increase: score = " + score + " < " + scoreAtStart + " = scoreAtStart");
            return step;
        } else // if value is infinite, i.e. we've jumped to unstable territory, then scale down jump
        if (Double.isInfinite(score) || Double.isInfinite(score2) || Double.isNaN(score) || Double.isNaN(score2)) {
            log.warn("Value is infinite after jump. oldStep={}. score={}, score2={}. Scaling back step size...", oldStep, score, score2);
            tmpStep = .2 * step;
            if (step < stepMin) {
                //convergence on delta x
                score = setScoreFor(parameters);
                log.warn("EXITING BACKTRACK: Jump too small (step={} < stepMin={}). Exiting and using previous parameters. Value={}", step, stepMin, score);
                return 0.0;
            }
        } else if (minObjectiveFunction) {
            if (// first time through
            step == 1.0)
                tmpStep = -slope / (2.0 * (score - scoreAtStart - slope));
            else {
                rhs1 = score - scoreAtStart - step * slope;
                rhs2 = score2 - scoreAtStart - step2 * slope;
                if (step == step2)
                    throw new IllegalStateException("FAILURE: dividing by step-step2 which equals 0. step=" + step);
                double stepSquared = step * step;
                double step2Squared = step2 * step2;
                a = (rhs1 / stepSquared - rhs2 / step2Squared) / (step - step2);
                b = (-step2 * rhs1 / stepSquared + step * rhs2 / step2Squared) / (step - step2);
                if (a == 0.0)
                    tmpStep = -slope / (2.0 * b);
                else {
                    disc = b * b - 3.0 * a * slope;
                    if (disc < 0.0) {
                        tmpStep = 0.5 * step;
                    } else if (b <= 0.0)
                        tmpStep = (-b + FastMath.sqrt(disc)) / (3.0 * a);
                    else
                        tmpStep = -slope / (b + FastMath.sqrt(disc));
                }
                if (tmpStep > 0.5 * step)
                    // lambda <= 0.5 lambda_1
                    tmpStep = 0.5 * step;
            }
        } else {
            if (// first time through
            step == 1.0)
                tmpStep = -slope / (2.0 * (scoreAtStart - score - slope));
            else {
                rhs1 = scoreAtStart - score - step * slope;
                rhs2 = scoreAtStart - score2 - step2 * slope;
                if (step == step2)
                    throw new IllegalStateException("FAILURE: dividing by step-step2 which equals 0. step=" + step);
                double stepSquared = step * step;
                double step2Squared = step2 * step2;
                a = (rhs1 / stepSquared - rhs2 / step2Squared) / (step - step2);
                b = (-step2 * rhs1 / stepSquared + step * rhs2 / step2Squared) / (step - step2);
                if (a == 0.0)
                    tmpStep = -slope / (2.0 * b);
                else {
                    disc = b * b - 3.0 * a * slope;
                    if (disc < 0.0) {
                        tmpStep = 0.5 * step;
                    } else if (b <= 0.0)
                        tmpStep = (-b + FastMath.sqrt(disc)) / (3.0 * a);
                    else
                        tmpStep = -slope / (b + FastMath.sqrt(disc));
                }
                if (tmpStep > 0.5 * step)
                    // lambda <= 0.5 lambda_1
                    tmpStep = 0.5 * step;
            }
        }
        step2 = step;
        score2 = score;
        log.debug("tmpStep: {}", tmpStep);
        // lambda >= .1*Lambda_1
        step = Math.max(tmpStep, .1f * step);
    }
    if (minObjectiveFunction && bestScore < scoreAtStart) {
        //Return best step size
        log.debug("Exited line search after maxIterations termination condition; bestStepSize={}, bestScore={}, scoreAtStart={}", bestStepSize, bestScore, scoreAtStart);
        return bestStepSize;
    } else if (!minObjectiveFunction && bestScore > scoreAtStart) {
        //Return best step size
        log.debug("Exited line search after maxIterations termination condition; bestStepSize={}, bestScore={}, scoreAtStart={}", bestStepSize, bestScore, scoreAtStart);
        return bestStepSize;
    } else {
        log.debug("Exited line search after maxIterations termination condition; score did not improve (bestScore={}, scoreAtStart={}). Resetting parameters", bestScore, scoreAtStart);
        setScoreFor(parameters);
        return 0.0;
    }
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) ScalarSetValue(org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue) Eps(org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps) Level1(org.nd4j.linalg.api.blas.Level1) NegativeDefaultStepFunction(org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction) NegativeGradientStepFunction(org.deeplearning4j.nn.conf.stepfunctions.NegativeGradientStepFunction)

Example 4 with NegativeDefaultStepFunction

use of org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction in project deeplearning4j by deeplearning4j.

the class BackTrackLineSearchTest method testSingleMaxLineSearch.

@Test
public void testSingleMaxLineSearch() throws Exception {
    double score1, score2;
    OutputLayer layer = getIrisLogisticLayerConfig(Activation.SOFTMAX, 100, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
    int nParams = layer.numParams();
    layer.setBackpropGradientsViewArray(Nd4j.create(1, nParams));
    layer.setInput(irisData.getFeatureMatrix());
    layer.setLabels(irisData.getLabels());
    layer.computeGradientAndScore();
    score1 = layer.score();
    BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, new NegativeDefaultStepFunction(), layer.getOptimizer());
    double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient());
    assertEquals(1.0, step, 1e-3);
}
Also used : OutputLayer(org.deeplearning4j.nn.layers.OutputLayer) BackTrackLineSearch(org.deeplearning4j.optimize.solvers.BackTrackLineSearch) NegativeDefaultStepFunction(org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction) Test(org.junit.Test)

Example 5 with NegativeDefaultStepFunction

use of org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction in project deeplearning4j by deeplearning4j.

the class BackTrackLineSearchTest method testMultMinLineSearch.

@Test
public void testMultMinLineSearch() throws Exception {
    double score1, score2;
    OutputLayer layer = getIrisLogisticLayerConfig(Activation.SOFTMAX, 100, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD);
    int nParams = layer.numParams();
    layer.setBackpropGradientsViewArray(Nd4j.create(1, nParams));
    layer.setInput(irisData.getFeatureMatrix());
    layer.setLabels(irisData.getLabels());
    layer.computeGradientAndScore();
    score1 = layer.score();
    INDArray origGradient = layer.gradient().gradient().dup();
    NegativeDefaultStepFunction sf = new NegativeDefaultStepFunction();
    BackTrackLineSearch lineSearch = new BackTrackLineSearch(layer, sf, layer.getOptimizer());
    double step = lineSearch.optimize(layer.params(), layer.gradient().gradient(), layer.gradient().gradient());
    INDArray currParams = layer.params();
    sf.step(currParams, origGradient, step);
    layer.setParams(currParams);
    layer.computeGradientAndScore();
    score2 = layer.score();
    assertTrue("score1=" + score1 + ", score2=" + score2, score1 > score2);
}
Also used : OutputLayer(org.deeplearning4j.nn.layers.OutputLayer) BackTrackLineSearch(org.deeplearning4j.optimize.solvers.BackTrackLineSearch) INDArray(org.nd4j.linalg.api.ndarray.INDArray) NegativeDefaultStepFunction(org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction) Test(org.junit.Test)

Aggregations

NegativeDefaultStepFunction (org.deeplearning4j.optimize.stepfunctions.NegativeDefaultStepFunction)6 Test (org.junit.Test)5 INDArray (org.nd4j.linalg.api.ndarray.INDArray)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)3 ConvexOptimizer (org.deeplearning4j.optimize.api.ConvexOptimizer)3 StochasticGradientDescent (org.deeplearning4j.optimize.solvers.StochasticGradientDescent)3 BatchNormalization (org.deeplearning4j.nn.conf.layers.BatchNormalization)2 OutputLayer (org.deeplearning4j.nn.layers.OutputLayer)2 BackTrackLineSearch (org.deeplearning4j.optimize.solvers.BackTrackLineSearch)2 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)1 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)1 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)1 NegativeGradientStepFunction (org.deeplearning4j.nn.conf.stepfunctions.NegativeGradientStepFunction)1 Level1 (org.nd4j.linalg.api.blas.Level1)1 ScalarSetValue (org.nd4j.linalg.api.ops.impl.scalar.comparison.ScalarSetValue)1 Eps (org.nd4j.linalg.api.ops.impl.transforms.comparison.Eps)1