Search in sources :

Example 1 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class MultiLayerUpdater method update.

@Override
public void update(Layer layer, Gradient gradient, int iteration, int batchSize) {
    MultiLayerNetwork mln = (MultiLayerNetwork) layer;
    Gradient[] layerGradients = new Gradient[layerUpdaters.length];
    for (int i = 0; i < layerGradients.length; i++) layerGradients[i] = new DefaultGradient();
    for (Map.Entry<String, INDArray> gradientPair : gradient.gradientForVariable().entrySet()) {
        String key = gradientPair.getKey();
        int idx = key.indexOf('_');
        if (idx == -1)
            throw new IllegalStateException("Invalid key: MuliLayerNetwork Gradient key does not have layer separator: \"" + key + "\"");
        int layerIdx = Integer.parseInt(key.substring(0, idx));
        String newKey = key.substring(idx + 1);
        layerGradients[layerIdx].gradientForVariable().put(newKey, gradientPair.getValue());
    }
    for (int i = 0; i < layerUpdaters.length; i++) {
        layerUpdaters[i].update(mln.getLayer(i), layerGradients[i], iteration, batchSize);
    }
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Map(java.util.Map)

Example 2 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class ComputationGraph method calcBackpropGradients.

/**
     * Do backprop (gradient calculation)
     *
     * @param truncatedBPTT    false: normal backprop. true: calculate gradients using truncated BPTT for RNN layers
     * @param externalEpsilons null usually (for typical supervised learning). If not null (and length > 0) then assume that
     *                         the user has provided some errors externally, as they would do for example in reinforcement
     *                         learning situations.
     */
protected void calcBackpropGradients(boolean truncatedBPTT, INDArray... externalEpsilons) {
    if (flattenedGradients == null)
        initGradientsView();
    LinkedList<Triple<String, INDArray, Character>> gradients = new LinkedList<>();
    //Do backprop according to the reverse of the topological ordering of the network
    //If true: already set epsilon for this vertex; later epsilons should be *added* to the existing one, not set
    boolean[] setVertexEpsilon = new boolean[topologicalOrder.length];
    for (int i = topologicalOrder.length - 1; i >= 0; i--) {
        GraphVertex current = vertices[topologicalOrder[i]];
        if (current.isInputVertex())
            //No op
            continue;
        //FIXME: make the frozen vertex feature extraction more flexible
        if (current.hasLayer() && current.getLayer() instanceof FrozenLayer)
            break;
        if (current.isOutputVertex()) {
            //Two reasons for a vertex to be an output vertex:
            //(a) it's an output layer (i.e., instanceof IOutputLayer), or
            //(b) it's a normal layer, but it has been marked as an output layer for use in external errors - for reinforcement learning, for example
            int thisOutputNumber = configuration.getNetworkOutputs().indexOf(current.getVertexName());
            if (current.getLayer() instanceof IOutputLayer) {
                IOutputLayer outputLayer = (IOutputLayer) current.getLayer();
                INDArray currLabels = labels[thisOutputNumber];
                outputLayer.setLabels(currLabels);
            } else {
                current.setEpsilon(externalEpsilons[thisOutputNumber]);
                setVertexEpsilon[topologicalOrder[i]] = true;
            }
        }
        Pair<Gradient, INDArray[]> pair = current.doBackward(truncatedBPTT);
        INDArray[] epsilons = pair.getSecond();
        //Inputs to the current GraphVertex:
        VertexIndices[] inputVertices = current.getInputVertices();
        //Set epsilons for the vertices that provide inputs to this vertex:
        if (inputVertices != null) {
            int j = 0;
            for (VertexIndices v : inputVertices) {
                GraphVertex gv = vertices[v.getVertexIndex()];
                if (setVertexEpsilon[gv.getVertexIndex()]) {
                    //This vertex: must output to multiple vertices... we want to add the epsilons here
                    INDArray currentEps = gv.getEpsilon();
                    //TODO: in some circumstances, it may be safe  to do in-place add (but not always)
                    gv.setEpsilon(currentEps.add(epsilons[j++]));
                } else {
                    gv.setEpsilon(epsilons[j++]);
                }
                setVertexEpsilon[gv.getVertexIndex()] = true;
            }
        }
        if (pair.getFirst() != null) {
            Gradient g = pair.getFirst();
            Map<String, INDArray> map = g.gradientForVariable();
            LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
            for (Map.Entry<String, INDArray> entry : map.entrySet()) {
                String origName = entry.getKey();
                String newName = current.getVertexName() + "_" + origName;
                tempList.addFirst(new Triple<>(newName, entry.getValue(), g.flatteningOrderForVariable(origName)));
            }
            for (Triple<String, INDArray, Character> t : tempList) gradients.addFirst(t);
        }
    }
    //Now, add the gradients in the order we need them in for flattening (same as params order)
    Gradient gradient = new DefaultGradient(flattenedGradients);
    for (Triple<String, INDArray, Character> t : gradients) {
        gradient.setGradientFor(t.getFirst(), t.getSecond(), t.getThird());
    }
    this.gradient = gradient;
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) Triple(org.deeplearning4j.berkeley.Triple) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GraphVertex(org.deeplearning4j.nn.graph.vertex.GraphVertex) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VertexIndices(org.deeplearning4j.nn.graph.vertex.VertexIndices) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer)

Example 3 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateSigmoidDecaySingleLayer.

@Test
public void testLearningRateSigmoidDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double decayRate = 2;
    double steps = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Sigmoid).lrPolicyDecayRate(decayRate).lrPolicySteps(steps).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcSigmoidDecay(layer.conf().getLearningRateByParam("W"), decayRate, i, steps);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 4 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testMomentumScheduleMLN.

@Test
public void testMomentumScheduleMLN() {
    double lr = 1e-2;
    double mu = 0.6;
    Map<Integer, Double> momentumAfter = new HashMap<>();
    momentumAfter.put(1, 0.2);
    int iterations = 2;
    int[] nIns = { 4, 2 };
    int[] nOuts = { 2, 3 };
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).momentumAfter(momentumAfter).iterations(iterations).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).backprop(true).pretrain(false).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    Updater updater = UpdaterCreator.getUpdater(net);
    int stateSize = updater.stateSizeForLayer(net);
    updater.setStateViewArray(net, Nd4j.create(1, stateSize), true);
    String wKey, bKey;
    Gradient gradientExpected = new DefaultGradient();
    for (int k = 0; k < net.getnLayers(); k++) {
        wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
        gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
        bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
        gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
    }
    Gradient gradientMLN = new DefaultGradient();
    for (int j = 0; j < 2; j++) {
        wKey = String.valueOf(j) + "_" + DefaultParamInitializer.WEIGHT_KEY;
        gradientMLN.setGradientFor(wKey, Nd4j.ones(nIns[j], nOuts[j]));
        bKey = String.valueOf(j) + "_" + DefaultParamInitializer.BIAS_KEY;
        gradientMLN.setGradientFor(bKey, Nd4j.ones(1, nOuts[j]));
    }
    for (int i = 0; i < 2; i++) {
        updater.update(net, gradientMLN, i, 1);
        mu = testNesterovsComputation(gradientMLN, gradientExpected, lr, mu, momentumAfter, i);
        assertEquals(mu, net.getLayer(1).conf().getLayer().getMomentum(), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) HashMap(java.util.HashMap) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) MultiLayerConfiguration(org.deeplearning4j.nn.conf.MultiLayerConfiguration) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Updater(org.deeplearning4j.nn.api.Updater) MultiLayerNetwork(org.deeplearning4j.nn.multilayer.MultiLayerNetwork) Test(org.junit.Test)

Example 5 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateExponentialDecaySingleLayer.

@Test
public void testLearningRateExponentialDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double decayRate = 2;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Exponential).lrPolicyDecayRate(decayRate).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcExponentialDecay(lr, decayRate, i);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Aggregations

DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)59 Gradient (org.deeplearning4j.nn.gradient.Gradient)58 INDArray (org.nd4j.linalg.api.ndarray.INDArray)56 Test (org.junit.Test)26 Pair (org.deeplearning4j.berkeley.Pair)23 Updater (org.deeplearning4j.nn.api.Updater)23 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)22 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)22 Layer (org.deeplearning4j.nn.api.Layer)20 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)16 HashMap (java.util.HashMap)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)4 Allocator (org.nd4j.jita.allocator.Allocator)4 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)4 IActivation (org.nd4j.linalg.activations.IActivation)4 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)4 Map (java.util.Map)3 DoublePointer (org.bytedeco.javacpp.DoublePointer)3 FloatPointer (org.bytedeco.javacpp.FloatPointer)3