Search in sources :

Example 91 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class VariationalAutoencoder method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    if (!zeroedPretrainParamGradients) {
        for (Map.Entry<String, INDArray> entry : gradientViews.entrySet()) {
            if (isPretrainParam(entry.getKey())) {
                entry.getValue().assign(0);
            }
        }
        zeroedPretrainParamGradients = true;
    }
    Gradient gradient = new DefaultGradient();
    VAEFwdHelper fwd = doForward(true, true);
    INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst();
    //Finally, calculate mean value:
    INDArray meanW = params.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
    //f order
    INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W);
    INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1];
    Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0);
    INDArray dLdMeanB = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B);
    //TODO: do this without the assign
    dLdMeanB.assign(currentDelta.sum(0));
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_W, dLdMeanW);
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_B, dLdMeanB);
    epsilon = meanW.mmul(currentDelta.transpose()).transpose();
    int nEncoderLayers = encoderLayerSizes.length;
    IActivation afn = conf().getLayer().getActivationFn();
    for (int i = nEncoderLayers - 1; i >= 0; i--) {
        String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
        String bKey = "e" + i + BIAS_KEY_SUFFIX;
        INDArray weights = params.get(wKey);
        INDArray dLdW = gradientViews.get(wKey);
        INDArray dLdB = gradientViews.get(bKey);
        INDArray preOut = fwd.encoderPreOuts[i];
        currentDelta = afn.backprop(preOut, epsilon).getFirst();
        INDArray actInput;
        if (i == 0) {
            actInput = input;
        } else {
            actInput = fwd.encoderActivations[i - 1];
        }
        Nd4j.gemm(actInput, currentDelta, dLdW, true, false, 1.0, 0.0);
        //TODO: do this without the assign
        dLdB.assign(currentDelta.sum(0));
        gradient.gradientForVariable().put(wKey, dLdW);
        gradient.gradientForVariable().put(bKey, dLdB);
        epsilon = weights.mmul(currentDelta.transpose()).transpose();
    }
    return new Pair<>(gradient, epsilon);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IActivation(org.nd4j.linalg.activations.IActivation) Pair(org.deeplearning4j.berkeley.Pair)

Example 92 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class GravesBidirectionalLSTM method backpropGradientHelper.

private Pair<Gradient, INDArray> backpropGradientHelper(final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength) {
    if (truncatedBPTT) {
        throw new UnsupportedOperationException("you can not time step a bidirectional RNN, it has to run on a batch of data all at once");
    }
    final FwdPassReturn fwdPass = activateHelperDirectional(true, null, null, true, true);
    final Pair<Gradient, INDArray> forwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS), epsilon, truncatedBPTT, tbpttBackwardLength, fwdPass, true, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_FORWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_FORWARDS, gradientViews, maskArray);
    final FwdPassReturn backPass = activateHelperDirectional(true, null, null, true, false);
    final Pair<Gradient, INDArray> backwardsGradient = LSTMHelpers.backpropGradientHelper(this.conf, this.layerConf().getGateActivationFn(), this.input, getParam(GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS), getParam(GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS), epsilon, truncatedBPTT, tbpttBackwardLength, backPass, false, GravesBidirectionalLSTMParamInitializer.INPUT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.RECURRENT_WEIGHT_KEY_BACKWARDS, GravesBidirectionalLSTMParamInitializer.BIAS_KEY_BACKWARDS, gradientViews, maskArray);
    //merge the gradient, which is key value pair of String,INDArray
    //the keys for forwards and backwards should be different
    final Gradient combinedGradient = new DefaultGradient();
    for (Map.Entry<String, INDArray> entry : forwardsGradient.getFirst().gradientForVariable().entrySet()) {
        combinedGradient.setGradientFor(entry.getKey(), entry.getValue());
    }
    for (Map.Entry<String, INDArray> entry : backwardsGradient.getFirst().gradientForVariable().entrySet()) {
        combinedGradient.setGradientFor(entry.getKey(), entry.getValue());
    }
    final Gradient correctOrderedGradient = new DefaultGradient();
    for (final String key : params.keySet()) {
        correctOrderedGradient.setGradientFor(key, combinedGradient.getGradientFor(key));
    }
    final INDArray forwardEpsilon = forwardsGradient.getSecond();
    final INDArray backwardsEpsilon = backwardsGradient.getSecond();
    final INDArray combinedEpsilon = forwardEpsilon.addi(backwardsEpsilon);
    //sum the errors that were back-propagated
    return new Pair<>(correctOrderedGradient, combinedEpsilon);
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Map(java.util.Map) Pair(org.deeplearning4j.berkeley.Pair)

Example 93 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayer method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
    Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true));
    INDArray delta = pair.getSecond();
    // centers
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray centersForExamples = labels.mmul(centers);
    INDArray dLcdai = input.sub(centersForExamples);
    INDArray epsilonNext = params.get(CenterLossParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
    double lambda = layerConf().getLambda();
    // add center loss here
    epsilonNext.addi(dLcdai.muli(lambda));
    return new Pair<>(pair.getFirst(), epsilonNext);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Example 94 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class CenterLossOutputLayer method getGradientsAndDelta.

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d();
    if (labels2d.size(1) != preOut.size(1)) {
        throw new DL4JInvalidInputException("Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ")");
    }
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);
    Gradient gradient = new DefaultGradient();
    INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY);
    INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY);
    INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY);
    // centers delta
    double alpha = layerConf().getAlpha();
    INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY);
    INDArray centersForExamples = labels.mmul(centers);
    INDArray diff = centersForExamples.sub(input).muli(alpha);
    INDArray numerator = labels.transpose().mmul(diff);
    INDArray denominator = labels.sum(0).addi(1.0).transpose();
    INDArray deltaC;
    if (layerConf().getGradientCheck()) {
        double lambda = layerConf().getLambda();
        //For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj
        deltaC = numerator.muli(lambda);
    } else {
        deltaC = numerator.diviColumnVector(denominator);
    }
    centersGradView.assign(deltaC);
    // other standard calculations
    //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0);
    biasGradView.assign(delta.sum(0));
    gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView);
    gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView);
    return new Pair<>(gradient, delta);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ILossFunction(org.nd4j.linalg.lossfunctions.ILossFunction) DL4JInvalidInputException(org.deeplearning4j.exception.DL4JInvalidInputException) Pair(org.deeplearning4j.berkeley.Pair)

Example 95 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class MultiLayerNetwork method truncatedBPTTGradient.

/** Equivalent to backprop(), but calculates gradient for truncated BPTT instead. */
protected void truncatedBPTTGradient() {
    if (flattenedGradients == null)
        initGradientsView();
    String multiGradientKey;
    gradient = new DefaultGradient();
    Layer currLayer;
    if (!(getOutputLayer() instanceof IOutputLayer)) {
        log.warn("Warning: final layer isn't output layer. You cannot use backprop (truncated BPTT) without an output layer.");
        return;
    }
    IOutputLayer outputLayer = (IOutputLayer) getOutputLayer();
    if (labels == null)
        throw new IllegalStateException("No labels found");
    if (outputLayer.conf().getLayer().getWeightInit() == WeightInit.ZERO) {
        throw new IllegalStateException("Output layer weights cannot be initialized to zero when using backprop.");
    }
    outputLayer.setLabels(labels);
    //calculate and apply the backward gradient for every layer
    int numLayers = getnLayers();
    //Store gradients is a list; used to ensure iteration order in DefaultGradient linked hash map. i.e., layer 0 first instead of output layer
    LinkedList<Pair<String, INDArray>> gradientList = new LinkedList<>();
    Pair<Gradient, INDArray> currPair = outputLayer.backpropGradient(null);
    for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
        multiGradientKey = String.valueOf(numLayers - 1) + "_" + entry.getKey();
        gradientList.addLast(new Pair<>(multiGradientKey, entry.getValue()));
    }
    if (getLayerWiseConfigurations().getInputPreProcess(numLayers - 1) != null)
        currPair = new Pair<>(currPair.getFirst(), this.layerWiseConfigurations.getInputPreProcess(numLayers - 1).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    // Calculate gradients for previous layers & drops output layer in count
    for (int j = numLayers - 2; j >= 0; j--) {
        currLayer = getLayer(j);
        if (currLayer instanceof RecurrentLayer) {
            currPair = ((RecurrentLayer) currLayer).tbpttBackpropGradient(currPair.getSecond(), layerWiseConfigurations.getTbpttBackLength());
        } else {
            currPair = currLayer.backpropGradient(currPair.getSecond());
        }
        LinkedList<Pair<String, INDArray>> tempList = new LinkedList<>();
        for (Map.Entry<String, INDArray> entry : currPair.getFirst().gradientForVariable().entrySet()) {
            multiGradientKey = String.valueOf(j) + "_" + entry.getKey();
            tempList.addFirst(new Pair<>(multiGradientKey, entry.getValue()));
        }
        for (Pair<String, INDArray> pair : tempList) gradientList.addFirst(pair);
        //Pass epsilon through input processor before passing to next layer (if applicable)
        if (getLayerWiseConfigurations().getInputPreProcess(j) != null)
            currPair = new Pair<>(currPair.getFirst(), getLayerWiseConfigurations().getInputPreProcess(j).backprop(currPair.getSecond(), getInputMiniBatchSize()));
    }
    //Add gradients to Gradients, in correct order
    for (Pair<String, INDArray> pair : gradientList) gradient.setGradientFor(pair.getFirst(), pair.getSecond());
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) FeedForwardLayer(org.deeplearning4j.nn.conf.layers.FeedForwardLayer) FrozenLayer(org.deeplearning4j.nn.layers.FrozenLayer) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IOutputLayer(org.deeplearning4j.nn.api.layers.IOutputLayer) RecurrentLayer(org.deeplearning4j.nn.api.layers.RecurrentLayer) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4