Search in sources :

Example 56 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class ComputationGraphUpdater method update.

/**
     * Update the gradients for the given ComputationGraph
     */
public void update(ComputationGraph graph, Gradient gradient, int iteration, int batchSize) {
    Map<String, Gradient> layerGradients = new HashMap<>();
    for (Map.Entry<String, INDArray> gradientPair : gradient.gradientForVariable().entrySet()) {
        String key = gradientPair.getKey();
        int idx = key.lastIndexOf('_');
        if (idx == -1)
            throw new IllegalStateException("Invalid key: ComputationGraph Gradient key does not have layer separator: \"" + key + "\"");
        String layerName = key.substring(0, idx);
        Gradient g = layerGradients.get(layerName);
        if (g == null) {
            g = new DefaultGradient();
            layerGradients.put(layerName, g);
        }
        String newKey = key.substring(idx + 1);
        g.setGradientFor(newKey, gradientPair.getValue());
    }
    for (Map.Entry<String, Gradient> entry : layerGradients.entrySet()) {
        String layerName = entry.getKey();
        int updaterIdx = layerUpdatersMap.get(layerName);
        layerUpdaters[updaterIdx].update(graph.getLayer(layerName), entry.getValue(), iteration, batchSize);
        //Gradients may be replaced by BaseUpdater.update()
        for (Map.Entry<String, INDArray> entry2 : layerGradients.get(layerName).gradientForVariable().entrySet()) {
            gradient.setGradientFor(entry.getKey() + "_" + entry2.getKey(), entry2.getValue());
        }
    }
}
Also used : DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) HashMap(java.util.HashMap) Map(java.util.Map) HashMap(java.util.HashMap)

Example 57 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class RBM method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    INDArray z = preOutput(input, true);
    INDArray activationDerivative = propUpDerivative(z);
    INDArray delta = epsilon.muli(activationDerivative);
    if (maskArray != null) {
        delta.muliColumnVector(maskArray);
    }
    Gradient ret = new DefaultGradient();
    //f order
    INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGrad, true, false, 1.0, 0.0);
    INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
    biasGrad.assign(delta.sum(0));
    INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);
    ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
    ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);
    INDArray epsilonNext = params.get(DefaultParamInitializer.WEIGHT_KEY).mmul(delta.transpose()).transpose();
    return new Pair<>(ret, epsilonNext);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Example 58 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class GlobalPoolingLayer method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    if (!collapseDimensions && epsilon.rank() != 2) {
        int[] origShape = epsilon.shape();
        //Don't collapse dims case: error should be [minibatch, vectorSize, 1] or [minibatch, depth, 1, 1]
        //Reshape it to 2d, to get rid of the 1s
        epsilon = epsilon.reshape(epsilon.ordering(), origShape[0], origShape[1]);
    }
    //Empty: no params
    Gradient retGradient = new DefaultGradient();
    int[] poolDim = null;
    if (input.rank() == 3) {
        if (poolingDimensions == null) {
            //Use default pooling dimensions;
            poolDim = DEFAULT_TIMESERIES_POOL_DIMS;
        } else {
            poolDim = poolingDimensions;
        }
    } else if (input.rank() == 4) {
        //CNN activations
        if (poolingDimensions == null) {
            //Use default pooling dimensions;
            poolDim = DEFAULT_CNN_POOL_DIMS;
        } else {
            poolDim = poolingDimensions;
        }
    }
    INDArray epsilonNd;
    if (maskArray == null) {
        //Standard 'full array' global pooling op
        epsilonNd = epsilonHelperFullArray(input, epsilon, poolDim);
    } else {
        if (input.rank() == 3) {
            epsilonNd = MaskedReductionUtil.maskedPoolingEpsilonTimeSeries(poolingType, input, maskArray, epsilon, pNorm);
        } else if (input.rank() == 4) {
            int h = input.size(2);
            boolean maskAlongHeight = (h == maskArray.size(1));
            epsilonNd = MaskedReductionUtil.maskedPoolingEpsilonCnn(poolingType, input, maskArray, epsilon, maskAlongHeight, pNorm);
        } else {
            throw new UnsupportedOperationException();
        }
    }
    return new Pair<>(retGradient, epsilonNd);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Example 59 with DefaultGradient

use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.

the class ConvolutionLayer method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) {
    INDArray weights = getParam(ConvolutionParamInitializer.WEIGHT_KEY);
    int miniBatch = input.size(0);
    int inH = input.size(2);
    int inW = input.size(3);
    int outDepth = weights.size(0);
    int inDepth = weights.size(1);
    int kH = weights.size(2);
    int kW = weights.size(3);
    int[] kernel = layerConf().getKernelSize();
    int[] strides = layerConf().getStride();
    int[] pad;
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { inH, inW }, kernel, strides);
    } else {
        pad = layerConf().getPadding();
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
    }
    int outH = outSize[0];
    int outW = outSize[1];
    INDArray biasGradView = gradientViews.get(ConvolutionParamInitializer.BIAS_KEY);
    //4d, c order. Shape: [outDepth,inDepth,kH,kW]
    INDArray weightGradView = gradientViews.get(ConvolutionParamInitializer.WEIGHT_KEY);
    INDArray weightGradView2df = Shape.newShapeNoCopy(weightGradView, new int[] { outDepth, inDepth * kH * kW }, false).transpose();
    INDArray delta;
    IActivation afn = conf.getLayer().getActivationFn();
    //TODO handle activation function params
    delta = conf().getLayer().getActivationFn().backprop(preOutput4d(true), epsilon).getFirst();
    if (helper != null && Nd4j.dataType() != DataBuffer.Type.HALF) {
        Pair<Gradient, INDArray> ret = helper.backpropGradient(input, weights, delta, kernel, strides, pad, biasGradView, weightGradView, afn, layerConf().getCudnnAlgoMode(), convolutionMode);
        if (ret != null) {
            return ret;
        }
    }
    //To shape: [outDepth,miniBatch,outH,outW]
    delta = delta.permute(1, 0, 2, 3);
    //Note: due to the permute in preOut, and the fact that we essentially do a preOut.muli(epsilon), this reshape
    // should be zero-copy; only possible exception being sometimes with the "identity" activation case
    //Shape.newShapeNoCopy(delta,new int[]{outDepth,miniBatch*outH*outW},false);
    INDArray delta2d = delta.reshape('c', new int[] { outDepth, miniBatch * outH * outW });
    //Do im2col, but with order [miniB,outH,outW,depthIn,kH,kW]; but need to input [miniBatch,depth,kH,kW,outH,outW] given the current im2col implementation
    //To get this: create an array of the order we want, permute it to the order required by im2col implementation, and then do im2col on that
    //to get old order from required order: permute(0,3,4,5,1,2)
    INDArray col = Nd4j.createUninitialized(new int[] { miniBatch, outH, outW, inDepth, kH, kW }, 'c');
    INDArray col2 = col.permute(0, 3, 4, 5, 1, 2);
    Convolution.im2col(input, kH, kW, strides[0], strides[1], pad[0], pad[1], convolutionMode == ConvolutionMode.Same, col2);
    //Shape im2col to 2d. Due to the permuting above, this should be a zero-copy reshape
    INDArray im2col2d = col.reshape('c', miniBatch * outH * outW, inDepth * kH * kW);
    //Calculate weight gradients, using cc->c mmul.
    //weightGradView2df is f order, but this is because it's transposed from c order
    //Here, we are using the fact that AB = (B^T A^T)^T; output here (post transpose) is in c order, not usual f order
    Nd4j.gemm(im2col2d, delta2d, weightGradView2df, true, true, 1.0, 0.0);
    //Flatten 4d weights to 2d... this again is a zero-copy op (unless weights are not originally in c order for some reason)
    //Start with c order weights, switch order to f order
    INDArray wPermuted = weights.permute(3, 2, 1, 0);
    INDArray w2d = wPermuted.reshape('f', inDepth * kH * kW, outDepth);
    //Calculate epsilons for layer below, in 2d format (note: this is in 'image patch' format before col2im reduction)
    //Note: cc -> f mmul here, then reshape to 6d in f order
    INDArray epsNext2d = w2d.mmul(delta2d);
    INDArray eps6d = Shape.newShapeNoCopy(epsNext2d, new int[] { kW, kH, inDepth, outW, outH, miniBatch }, true);
    //Calculate epsilonNext by doing im2col reduction.
    //Current col2im implementation expects input with order: [miniBatch,depth,kH,kW,outH,outW]
    //currently have [kH,kW,inDepth,outW,outH,miniBatch] -> permute first
    eps6d = eps6d.permute(5, 2, 1, 0, 4, 3);
    INDArray epsNextOrig = Nd4j.create(new int[] { inDepth, miniBatch, inH, inW }, 'c');
    //Note: we are execute col2im in a way that the output array should be used in a stride 1 muli in the layer below... (same strides as zs/activations)
    INDArray epsNext = epsNextOrig.permute(1, 0, 2, 3);
    Convolution.col2im(eps6d, epsNext, strides[0], strides[1], pad[0], pad[1], inH, inW);
    Gradient retGradient = new DefaultGradient();
    INDArray biasGradTemp = delta2d.sum(1);
    //TODO do this properly, without the assign
    biasGradView.assign(biasGradTemp);
    retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
    retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
    return new Pair<>(retGradient, epsNext);
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) IActivation(org.nd4j.linalg.activations.IActivation) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)59 Gradient (org.deeplearning4j.nn.gradient.Gradient)58 INDArray (org.nd4j.linalg.api.ndarray.INDArray)56 Test (org.junit.Test)26 Pair (org.deeplearning4j.berkeley.Pair)23 Updater (org.deeplearning4j.nn.api.Updater)23 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)22 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)22 Layer (org.deeplearning4j.nn.api.Layer)20 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)16 HashMap (java.util.HashMap)5 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)4 Allocator (org.nd4j.jita.allocator.Allocator)4 AtomicAllocator (org.nd4j.jita.allocator.impl.AtomicAllocator)4 IActivation (org.nd4j.linalg.activations.IActivation)4 GridExecutioner (org.nd4j.linalg.api.ops.executioner.GridExecutioner)4 CudaContext (org.nd4j.linalg.jcublas.context.CudaContext)4 Map (java.util.Map)3 DoublePointer (org.bytedeco.javacpp.DoublePointer)3 FloatPointer (org.bytedeco.javacpp.FloatPointer)3