Search in sources :

Example 31 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testPretrain.

@Test
public void testPretrain() {
    double lr = 0.05;
    gradient.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    Gradient gradientDup2 = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    gradientDup.setGradientFor(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbiasGradient.dup());
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).seed(42).layer(new org.deeplearning4j.nn.conf.layers.RBM.Builder().lossFunction(LossFunctions.LossFunction.COSINE_PROXIMITY).activation(Activation.IDENTITY).updater(org.deeplearning4j.nn.conf.Updater.SGD).nIn(nIn).nOut(nOut).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    boolean preTrain = true;
    conf.setPretrain(preTrain);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
        val = entry.getValue();
        gradExpected = val.mul(lr);
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
    preTrain = false;
    conf.setPretrain(preTrain);
    gradient = gradientDup;
    params = Nd4j.create(1, numParams);
    layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    updater.update(layer, gradient, -1, 1);
    for (Map.Entry<String, INDArray> entry : gradientDup2.gradientForVariable().entrySet()) {
        val = entry.getValue();
        if (entry.getKey() != "vb")
            gradExpected = val.mul(lr);
        else
            gradExpected = val;
        assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
    }
    assertEquals(lr, layer.conf().getLayer().getLearningRate(), 1e-4);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 32 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testAdaDeltaUpdate.

@Test
public void testAdaDeltaUpdate() {
    INDArray dxSquared;
    Map<String, INDArray> msg = new HashMap<>();
    Map<String, INDArray> msdx = new HashMap<>();
    double rho = 0.85;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().rho(rho).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).epsilon(Nd4j.EPS_THRESHOLD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int updaterStateSize = updater.stateSizeForLayer(layer);
    INDArray updaterState = Nd4j.create(1, updaterStateSize);
    updater.setStateViewArray(layer, updaterState, true);
    Gradient gradientDup = new DefaultGradient();
    gradientDup.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientDup.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (int i = 0; i < 2; i++) {
        updater.update(layer, gradient, i, 1);
        for (Map.Entry<String, INDArray> entry : gradientDup.gradientForVariable().entrySet()) {
            key = entry.getKey();
            val = entry.getValue();
            INDArray msgTmp = msg.get(key);
            INDArray msdxTmp = msdx.get(key);
            if (msgTmp == null) {
                msgTmp = Nd4j.zeros(val.shape());
                msdxTmp = Nd4j.zeros(val.shape());
            }
            msgTmp.muli(rho);
            msgTmp.addi(val.mul(val).muli(1 - rho));
            gradExpected = Transforms.sqrt(msdxTmp.add(Nd4j.EPS_THRESHOLD)).divi(Transforms.sqrt(msgTmp.add(Nd4j.EPS_THRESHOLD))).muli(val);
            gradientDup.setGradientFor(key, gradExpected);
            assertEquals(gradExpected, gradient.getGradientFor(entry.getKey()));
            msdxTmp.muli(rho);
            dxSquared = gradExpected.mul(gradExpected);
            msdxTmp.addi(dxSquared.muli(1 - rho));
            msg.put(key, msgTmp);
            msdx.put(key, msdxTmp);
        }
        assertEquals(rho, layer.conf().getLayer().getRho(), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 33 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class TestUpdaters method testEpsilon.

@Test
public void testEpsilon() {
    //Test epsilon setting - adagrad
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()).layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
    assertEquals(1e-6, conf.getConf(0).getLayer().getEpsilon(), 0.0);
    assertEquals(0.123, conf.getConf(1).getLayer().getEpsilon(), 0.0);
    assertEquals(0.456, conf.getConf(2).getLayer().getEpsilon(), 0.0);
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    MultiLayerUpdater updater = (MultiLayerUpdater) net.getUpdater();
    Updater[] updaters = updater.getLayerUpdaters();
    LayerUpdater u0 = (LayerUpdater) updaters[0];
    AdaGrad adaGrad = (AdaGrad) u0.updaterForVariable.get("W");
    assertEquals(1e-6, adaGrad.getEpsilon(), 0.0);
    LayerUpdater u1 = (LayerUpdater) updaters[1];
    AdaGrad adaGrad1 = (AdaGrad) u1.updaterForVariable.get("W");
    assertEquals(0.123, adaGrad1.getEpsilon(), 0.0);
    LayerUpdater u2 = (LayerUpdater) updaters[2];
    AdaGrad adaGrad2 = (AdaGrad) u2.updaterForVariable.get("W");
    assertEquals(0.456, adaGrad2.getEpsilon(), 0.0);
    //Test epsilon setting - adadelta
    conf = new NeuralNetConfiguration.Builder().updater(org.deeplearning4j.nn.conf.Updater.ADADELTA).list().layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()).layer(1, new DenseLayer.Builder().nIn(2).nOut(2).epsilon(0.123).build()).layer(2, new OutputLayer.Builder().nIn(2).nOut(2).epsilon(0.456).build()).build();
    assertEquals(1e-6, conf.getConf(0).getLayer().getEpsilon(), 0.0);
    assertEquals(0.123, conf.getConf(1).getLayer().getEpsilon(), 0.0);
    assertEquals(0.456, conf.getConf(2).getLayer().getEpsilon(), 0.0);
    net = new MultiLayerNetwork(conf);
    net.init();
    updater = (MultiLayerUpdater) net.getUpdater();
    updaters = updater.getLayerUpdaters();
    LayerUpdater u0_2 = (LayerUpdater) updaters[0];
    AdaDelta adaDelta = (AdaDelta) u0_2.updaterForVariable.get("W");
    assertEquals(1e-6, adaDelta.getEpsilon(), 0.0);
    LayerUpdater u1_2 = (LayerUpdater) updaters[1];
    AdaDelta adaDelta1 = (AdaDelta) u1_2.updaterForVariable.get("W");
    assertEquals(0.123, adaDelta1.getEpsilon(), 0.0);
    LayerUpdater u2_2 = (LayerUpdater) updaters[2];
    AdaDelta adaDelta2 = (AdaDelta) u2_2.updaterForVariable.get("W");
    assertEquals(0.456, adaDelta2.getEpsilon(), 0.0);
}
Also used : OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 34 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class GradientCheckUtil method checkGradientsPretrainLayer.

/**
     * Check backprop gradients for a pretrain layer
     *
     * NOTE: gradient checking pretrain layers can be difficult...
     */
public static boolean checkGradientsPretrainLayer(Layer layer, double epsilon, double maxRelError, double minAbsoluteError, boolean print, boolean exitOnFirstError, INDArray input, int rngSeed) {
    //Basic sanity checks on input:
    if (epsilon <= 0.0 || epsilon > 0.1)
        throw new IllegalArgumentException("Invalid epsilon: expect epsilon in range (0,0.1], usually 1e-4 or so");
    if (maxRelError <= 0.0 || maxRelError > 0.25)
        throw new IllegalArgumentException("Invalid maxRelativeError: " + maxRelError);
    //Check network configuration:
    int layerCount = 0;
    layer.setInput(input);
    Nd4j.getRandom().setSeed(rngSeed);
    layer.computeGradientAndScore();
    Pair<Gradient, Double> gradAndScore = layer.gradientAndScore();
    Updater updater = UpdaterCreator.getUpdater(layer);
    updater.update(layer, gradAndScore.getFirst(), 0, layer.batchSize());
    //need dup: gradients are a *view* of the full gradient array (which will change every time backprop is done)
    INDArray gradientToCheck = gradAndScore.getFirst().gradient().dup();
    //need dup: params are a *view* of full parameters
    INDArray originalParams = layer.params().dup();
    int nParams = originalParams.length();
    Map<String, INDArray> paramTable = layer.paramTable();
    List<String> paramNames = new ArrayList<>(paramTable.keySet());
    int[] paramEnds = new int[paramNames.size()];
    paramEnds[0] = paramTable.get(paramNames.get(0)).length();
    for (int i = 1; i < paramEnds.length; i++) {
        paramEnds[i] = paramEnds[i - 1] + paramTable.get(paramNames.get(i)).length();
    }
    int totalNFailures = 0;
    double maxError = 0.0;
    int currParamNameIdx = 0;
    //Assumption here: params is a view that we can modify in-place
    INDArray params = layer.params();
    for (int i = 0; i < nParams; i++) {
        //Get param name
        if (i >= paramEnds[currParamNameIdx]) {
            currParamNameIdx++;
        }
        String paramName = paramNames.get(currParamNameIdx);
        //(w+epsilon): Do forward pass and score
        double origValue = params.getDouble(i);
        params.putScalar(i, origValue + epsilon);
        //TODO add a 'score' method that doesn't calculate gradients...
        Nd4j.getRandom().setSeed(rngSeed);
        layer.computeGradientAndScore();
        double scorePlus = layer.score();
        //(w-epsilon): Do forward pass and score
        params.putScalar(i, origValue - epsilon);
        Nd4j.getRandom().setSeed(rngSeed);
        layer.computeGradientAndScore();
        double scoreMinus = layer.score();
        //Reset original param value
        params.putScalar(i, origValue);
        //Calculate numerical parameter gradient:
        double scoreDelta = scorePlus - scoreMinus;
        double numericalGradient = scoreDelta / (2 * epsilon);
        if (Double.isNaN(numericalGradient))
            throw new IllegalStateException("Numerical gradient was NaN for parameter " + i + " of " + nParams);
        double backpropGradient = gradientToCheck.getDouble(i);
        //http://cs231n.github.io/neural-networks-3/#gradcheck
        //use mean centered
        double relError = Math.abs(backpropGradient - numericalGradient) / (Math.abs(numericalGradient) + Math.abs(backpropGradient));
        if (backpropGradient == 0.0 && numericalGradient == 0.0)
            //Edge case: i.e., RNNs with time series length of 1.0
            relError = 0.0;
        if (relError > maxError)
            maxError = relError;
        if (relError > maxRelError || Double.isNaN(relError)) {
            double absError = Math.abs(backpropGradient - numericalGradient);
            if (absError < minAbsoluteError) {
                log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + "; absolute error = " + absError + " < minAbsoluteError = " + minAbsoluteError);
            } else {
                if (print)
                    log.info("Param " + i + " (" + paramName + ") FAILED: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError + ", scorePlus=" + scorePlus + ", scoreMinus= " + scoreMinus);
                if (exitOnFirstError)
                    return false;
                totalNFailures++;
            }
        } else if (print) {
            log.info("Param " + i + " (" + paramName + ") passed: grad= " + backpropGradient + ", numericalGrad= " + numericalGradient + ", relError= " + relError);
        }
    }
    if (print) {
        int nPass = nParams - totalNFailures;
        log.info("GradientCheckUtil.checkGradients(): " + nParams + " params checked, " + nPass + " passed, " + totalNFailures + " failed. Largest relative error = " + maxError);
    }
    return totalNFailures == 0;
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) ArrayList(java.util.ArrayList) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ComputationGraphUpdater(org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater) Updater(org.deeplearning4j.nn.api.Updater)

Example 35 with Updater

use of org.deeplearning4j.nn.api.Updater in project deeplearning4j by deeplearning4j.

the class LossLayer method fit.

/**
     * Fit the model
     *
     * @param input the examples to classify (one example in each row)
     * @param labels   the example labels(a binary outcome matrix)
     */
@Override
public void fit(INDArray input, INDArray labels) {
    setInput(input);
    setLabels(labels);
    applyDropOutIfNecessary(true);
    if (solver == null) {
        solver = new Solver.Builder().configure(conf()).listeners(getListeners()).model(this).build();
        //Set the updater state view array. For MLN and CG, this is done by MultiLayerUpdater and ComputationGraphUpdater respectively
        Updater updater = solver.getOptimizer().getUpdater();
        int updaterStateSize = updater.stateSizeForLayer(this);
        if (updaterStateSize > 0)
            updater.setStateViewArray(this, Nd4j.createUninitialized(new int[] { 1, updaterStateSize }, Nd4j.order()), true);
    }
    solver.optimize();
}
Also used : Solver(org.deeplearning4j.optimize.Solver) Updater(org.deeplearning4j.nn.api.Updater)

Aggregations

Updater (org.deeplearning4j.nn.api.Updater)37 Test (org.junit.Test)28 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)27 INDArray (org.nd4j.linalg.api.ndarray.INDArray)27 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)25 Gradient (org.deeplearning4j.nn.gradient.Gradient)25 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)23 Layer (org.deeplearning4j.nn.api.Layer)21 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)18 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)9 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)8 ComputationGraphUpdater (org.deeplearning4j.nn.updater.graph.ComputationGraphUpdater)5 HashMap (java.util.HashMap)4 Solver (org.deeplearning4j.optimize.Solver)4 ArrayList (java.util.ArrayList)2 Field (java.lang.reflect.Field)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ZipEntry (java.util.zip.ZipEntry)1 ZipFile (java.util.zip.ZipFile)1 Persistable (org.deeplearning4j.api.storage.Persistable)1