Search in sources :

Example 51 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class VariationalAutoencoder method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    if (!zeroedPretrainParamGradients) {
        for (Map.Entry<String, INDArray> entry : gradientViews.entrySet()) {
            if (isPretrainParam(entry.getKey())) {
                entry.getValue().assign(0);
            }
        }
        zeroedPretrainParamGradients = true;
    }
    Gradient gradient = new DefaultGradient();
    VAEFwdHelper fwd = doForward(true, true);
    INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst();
    //Finally, calculate mean value:
    INDArray meanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
    //f order
    INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
    INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1];
    Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0);
    INDArray dLdMeanB = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B);
    //TODO: do this without the assign
    dLdMeanB.assign(currentDelta.sum(0));
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_W, dLdMeanW);
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_B, dLdMeanB);
    epsilon = meanW.mmul(currentDelta.transpose()).transpose();
    int nEncoderLayers = encoderLayerSizes.length;
    IActivation afn = conf().getLayer().getActivationFn();
    for (int i = nEncoderLayers - 1; i >= 0; i--) {
        String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
        String bKey = "e" + i + BIAS_KEY_SUFFIX;
        INDArray weights = params.get(wKey);
        INDArray dLdW = gradientViews.get(wKey);
        INDArray dLdB = gradientViews.get(bKey);
        INDArray preOut = fwd.encoderPreOuts[i];
        currentDelta = afn.backprop(preOut, epsilon).getFirst();
        INDArray actInput;
        if (i == 0) {
            actInput = input;
        } else {
            actInput = fwd.encoderActivations[i - 1];
        }
        Nd4j.gemm(actInput, currentDelta, dLdW, true, false, 1.0, 0.0);
        //TODO: do this without the assign
        dLdB.assign(currentDelta.sum(0));
        gradient.gradientForVariable().put(wKey, dLdW);
        gradient.gradientForVariable().put(bKey, dLdB);
        epsilon = weights.mmul(currentDelta.transpose()).transpose();
    }
    return new Pair<>(gradient, epsilon);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IActivation(org.nd4j.linalg.activations.IActivation) Pair(org.deeplearning4j.berkeley.Pair)

Example 52 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTM method backpropGradientHelper.

private Pair<Gradient, INDArray> backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength) {
    if (truncatedBPTT) {
        throw new UnsupportedOperationException("you can not time step a bidirectional RNN, it has to run on a batch of data all at once");
    }
    final FwdPassReturn fwdPass = activateHelperDirectional(true, null, null, true, true);
    final Pair<Gradient, INDArray> forwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, gradientViews, maskArray);
    final FwdPassReturn backPass = activateHelperDirectional(true, null, null, true, false);
    final Pair<Gradient, INDArray> backwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), epsilon, truncatedBPTT, tbpttBackwardLength, backPass, false, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, gradientViews, maskArray);
    //merge the gradient, which is key value pair of String,INDArray
    //the keys for forwards and backwards should be different
    final Gradient combinedGradient = new DefaultGradient();
    for (Map.Entry<String, INDArray> entry : forwardsGradient.getFirst().gradientForVariable().entrySet()) {
        combinedGradient.setGradientFor(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, INDArray> entry : backwardsGradient.getFirst().gradientForVariable().entrySet()) {
        combinedGradient.setGradientFor(entry.getKey(), entry.getValue());
    }
    final Gradient correctOrderedGradient = new DefaultGradient();
    for (final String key : params.keySet()) {
        correctOrderedGradient.setGradientFor(key, combinedGradient.getGradientFor(key));
    }
    final INDArray forwardEpsilon = forwardsGradient.getSecond();
    final INDArray backwardsEpsilon = backwardsGradient.getSecond();
    final INDArray combinedEpsilon = forwardEpsilon.addi(backwardsEpsilon);
    //sum the errors that were back-propagated
    return new Pair<>(correctOrderedGradient, combinedEpsilon);
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Map(java.util.Map) Pair(org.deeplearning4j.berkeley.Pair)

Example 53 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayer method getGradientsAndDelta.

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d();
    if (labels2d.size(1) != preOut.size(1)) {
        throw new DL4JInvalidInputException("Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ")");
    }
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
    Gradient gradient = new DefaultGradient();
    INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY);
    INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY);
    INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY);
    // centers delta
    double alpha = layerConf().getAlpha();
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray centersForExamples = labels.mmul(centers);
    INDArray diff = centersForExamples.sub(input).muli(alpha);
    INDArray numerator = labels.transpose().mmul(diff);
    INDArray denominator = labels.sum(0).addi(1.0).transpose();
    INDArray deltaC;
    if (layerConf().getGradientCheck()) {
        double lambda = layerConf().getLambda();
        //For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj
        deltaC = numerator.muli(lambda);
    } else {
        deltaC = numerator.diviColumnVector(denominator);
    }
    centersGradView.assign(deltaC);
    // other standard calculations
    //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0);
    biasGradView.assign(delta.sum(0));
    gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView);
    return new Pair<>(gradient, delta);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) Pair(org.deeplearning4j.berkeley.Pair)

Example 54 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method truncatedBPTTGradient.

/** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */
protected void truncatedBPTTGradient() {
    if (flattenedGradients == null)
        initGradientsView();
    String multiGradientKey;
    gradient = new DefaultGradient();
    Layer currLayer;
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        log.warn("Warning: final layer isn't output layer. You cannot use backprop (truncated BPTT) without an output layer.");
        return;
    }
    IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
    if (labels == null)
        throw new IllegalStateException("No labels found");
    if (outputLayer.conf().getLayer().getWeightInit() == WeightInit.ZERO) {
        throw new IllegalStateException("Output layer weights cannot be initialized to zero when using backprop.");
    }
    outputLayer.setLabels(labels);
    //calculate and apply the backward gradient for every layer
    int numLayers = getnLayers();
    //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
    LinkedList<Pair<String, INDArray>> gradientList = new LinkedList<>();
    Pair<Gradient, INDArray> currPair = outputLayer.backpropGradient(null);
    for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
        multiGradientKey = String.valueOf(numLayers - 1) + "_" + entry.getKey();
        gradientList.addLast(new Pair<>(multiGradientKey, entry.getValue()));
    }
    if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
        currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    // Calculate gradients for previous layers & drops output layer in count
    for (int j = numLayers - 2; j >= 0; j--) {
        currLayer = getLayer(j);
        if (currLayer instanceof RecurrentLayer) {
            currPair = ((RecurrentLayer) currLayer).tbpttBackpropGradient(currPair.getSecond(), layerWiseConfigurations.getTbpttBackLength());
        } else {
            currPair = currLayer.backpropGradient(currPair.getSecond());
        }
        LinkedList<Pair<String, INDArray>> tempList = new LinkedList<>();
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            multiGradientKey = String.valueOf(j) + "_" + entry.getKey();
            tempList.addFirst(new Pair<>(multiGradientKey, entry.getValue()));
        }
        for (Pair<String, INDArray> pair : tempList) gradientList.addFirst(pair);
        //Pass epsilon through input processor before passing to next layer (if applicable)
        if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
            currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    }
    //Add gradients to Gradients, in correct order
    for (Pair<String, INDArray> pair : gradientList) gradient.setGradientFor(pair.getFirst(), pair.getSecond());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) Pair(org.deeplearning4j.berkeley.Pair)

Example 55 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method calcBackpropGradients.

/** Calculate gradients and errors. Used in two places:
     * (a) backprop (for standard multi layer network learning)
     * (b) backpropGradient (layer method, for when MultiLayerNetwork is used as a layer)
     * @param epsilon Errors (technically errors .* activations). Not used if withOutputLayer = true
     * @param withOutputLayer if true: assume last layer is output layer, and calculate errors based on labels. In this
     *                        case, the epsilon input is not used (may/should be null).
     *                        If false: calculate backprop gradients
     * @return Gradients and the error (epsilon) at the input
     */
protected Pair<Gradient, INDArray> calcBackpropGradients(INDArray epsilon, boolean withOutputLayer) {
    if (flattenedGradients == null)
        initGradientsView();
    String multiGradientKey;
    Gradient gradient = new DefaultGradient(flattenedGradients);
    Layer currLayer;
    //calculate and apply the backward gradient for every layer
    /**
         * Skip the output layer for the indexing and just loop backwards updating the coefficients for each layer.
         * (when withOutputLayer == true)
         *
         * Activate applies the activation function for each layer and sets that as the input for the following layer.
         *
         * Typical literature contains most trivial case for the error calculation: wT * weights
         * This interpretation transpose a few things to get mini batch because ND4J is rows vs columns organization for params
         */
    int numLayers = getnLayers();
    //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
    LinkedList<Triple<String, INDArray, Character>> gradientList = new LinkedList<>();
    int layerFrom;
    Pair<Gradient, INDArray> currPair;
    if (withOutputLayer) {
        if (!(getOutputLayer() instanceof IOutputLayer)) {
            log.warn("Warning: final layer isn't output layer. You cannot use backprop without an output layer.");
            return null;
        }
        IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
        if (labels == null)
            throw new IllegalStateException("No labels found");
        outputLayer.setLabels(labels);
        currPair = outputLayer.backpropGradient(null);
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            String origName = entry.getKey();
            multiGradientKey = String.valueOf(numLayers - 1) + "_" + origName;
            gradientList.addLast(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
        }
        if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
            currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
        layerFrom = numLayers - 2;
    } else {
        currPair = new Pair<>(null, epsilon);
        layerFrom = numLayers - 1;
    }
    // Calculate gradients for previous layers & drops output layer in count
    for (int j = layerFrom; j >= 0; j--) {
        currLayer = getLayer(j);
        if (currLayer instanceof FrozenLayer)
            break;
        currPair = currLayer.backpropGradient(currPair.getSecond());
        LinkedList<Triple<String, INDArray, Character>> tempList = new LinkedList<>();
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            String origName = entry.getKey();
            multiGradientKey = String.valueOf(j) + "_" + origName;
            tempList.addFirst(new Triple<>(multiGradientKey, entry.getValue(), currPair.getFirst().flatteningOrderForVariable(origName)));
        }
        for (Triple<String, INDArray, Character> triple : tempList) gradientList.addFirst(triple);
        //Pass epsilon through input processor before passing to next layer (if applicable)
        if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
            currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    }
    //Add gradients to Gradients (map), in correct order
    for (Triple<String, INDArray, Character> triple : gradientList) {
        gradient.setGradientFor(triple.getFirst(), triple.getSecond(), triple.getThird());
    }
    return new Pair<>(gradient, currPair.getSecond());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) Triple(org.deeplearning4j.berkeley.Triple) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)59 Gradient (org.deeplearning4j.nn.gradient.Gradient)58 INDArray (org.nd4j.linalg.api.ndarray.INDArray)56 Test (org.junit.Test)26 Pair (org.deeplearning4j.berkeley.Pair)23 Updater (org.deeplearning4j.nn.api.Updater)23 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)22 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)22 Layer (org.deeplearning4j.nn.api.Layer)20 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)16 HashMap (java.util.HashMap)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)4 Allocator (org.nd4j.jita.allocator.Allocator)4 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)4 IActivation (org.nd4j.linalg.activations.IActivation)4 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)4 Map (java.util.Map)3 DoublePointer (org.bytedeco.javacpp.DoublePointer)3 FloatPointer (org.bytedeco.javacpp.FloatPointer)3