Search in sources :

Example 66 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testMomentumScheduleSingleLayer.

@Test
public void testMomentumScheduleSingleLayer() {
    double lr = 1e-2;
    double mu = 0.6;
    Map<Integer, Double> momentumAfter = new HashMap<>();
    momentumAfter.put(1, 0.2);
    int iterations = 2;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).momentum(mu).momentumAfter(momentumAfter).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    int stateSize = updater.stateSizeForLayer(layer);
    updater.setStateViewArray(layer, Nd4j.create(1, stateSize), true);
    Gradient gradientExpected = new DefaultGradient();
    gradientExpected.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient.dup());
    gradientExpected.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient.dup());
    for (int i = 0; i < 2; i++) {
        updater.update(layer, gradientSingle, i, 1);
        mu = testNesterovsComputation(gradientSingle, gradientExpected, lr, mu, momentumAfter, i);
        assertEquals(mu, layer.conf().getLayer().getMomentum(), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) HashMap(java.util.HashMap) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) Test(org.junit.Test)

Example 67 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class TestDecayPolicies method testLearningRateInverseDecaySingleLayer.

@Test
public void testLearningRateInverseDecaySingleLayer() {
    int iterations = 2;
    double lr = 1e-2;
    double decayRate = 2;
    double power = 3;
    NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(decayRate).lrPolicyPower(power).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
    int numParams = conf.getLayer().initializer().numParams(conf);
    INDArray params = Nd4j.create(1, numParams);
    Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
    Updater updater = UpdaterCreator.getUpdater(layer);
    Gradient gradientActual = new DefaultGradient();
    gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
    gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
    for (int i = 0; i < iterations; i++) {
        updater.update(layer, gradientActual, i, 1);
        double expectedLr = calcInverseDecay(lr, decayRate, i, power);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
        assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
    }
}
Also used : Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Updater(org.deeplearning4j.nn.api.Updater) NeuralNetConfiguration(org.deeplearning4j.nn.conf.NeuralNetConfiguration) Layer(org.deeplearning4j.nn.api.Layer) OutputLayer(org.deeplearning4j.nn.conf.layers.OutputLayer) DenseLayer(org.deeplearning4j.nn.conf.layers.DenseLayer) Test(org.junit.Test)

Example 68 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class CudnnConvolutionHelper method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray weights, INDArray delta, int[] kernel, int[] strides, int[] pad, INDArray biasGradView, INDArray weightGradView, IActivation afn, AlgoMode mode, ConvolutionMode convolutionMode) {
    int miniBatch = input.size(0);
    int inH = input.size(2);
    int inW = input.size(3);
    int outDepth = weights.size(0);
    int inDepth = weights.size(1);
    int kH = weights.size(2);
    int kW = weights.size(3);
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
    } else {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
    }
    int outH = outSize[0];
    int outW = outSize[1];
    if (!Shape.strideDescendingCAscendingF(delta)) {
        // apparently not supported by cuDNN
        delta = delta.dup();
    }
    int[] srcStride = input.stride();
    int[] deltaStride = delta.stride();
    int[] algo1 = new int[1];
    int[] algo2 = new int[1];
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, outDepth, outH, outW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
    checkCudnn(cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], 1, 1, CUDNN_CROSS_CORRELATION));
    checkCudnn(cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, tensorFormat, outDepth, inDepth, kH, kW));
    checkCudnn(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, 0, algo1));
    checkCudnn(cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.srcTensorDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, 0, algo2));
    INDArray epsNext = Nd4j.create(new int[] { miniBatch, inDepth, inH, inW }, 'c');
    int[] dstStride = epsNext.stride();
    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, weights, weightGradView, biasGradView, delta, epsNext);
    Pointer srcData = allocator.getPointer(input, context);
    Pointer filterData = allocator.getPointer(weights, context);
    Pointer filterGradData = allocator.getPointer(weightGradView, context);
    Pointer biasGradData = allocator.getPointer(biasGradView, context);
    Pointer deltaData = allocator.getPointer(delta, context);
    Pointer dstData = allocator.getPointer(epsNext, context);
    checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
    checkCudnn(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, algo1[0], sizeInBytes));
    long sizeInBytes1 = sizeInBytes.get(0);
    checkCudnn(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo2[0], sizeInBytes));
    long sizeInBytes2 = sizeInBytes.get(0);
    if (sizeInBytes1 > workSpace.capacity() || sizeInBytes2 > workSpace.capacity()) {
        workSpace.deallocate();
        workSpace = new WorkSpace(Math.max(sizeInBytes1, sizeInBytes2));
    }
    checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, tensorFormat, dataType, 1, outDepth, 1, 1));
    checkCudnn(cudnnConvolutionBackwardBias(cudnnContext, alpha, cudnnContext.deltaTensorDesc, deltaData, beta, cudnnContext.biasTensorDesc, biasGradData));
    checkCudnn(cudnnConvolutionBackwardFilter(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo1[0], workSpace, workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData));
    checkCudnn(cudnnConvolutionBackwardData(cudnnContext, alpha, cudnnContext.filterDesc, filterData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo2[0], workSpace, workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData));
    allocator.getFlowController().registerActionAllWrite(context, input, weights, weightGradView, biasGradView, delta, epsNext);
    Gradient retGradient = new DefaultGradient();
    retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
    retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
    return new Pair<>(retGradient, epsNext);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) Allocator(org.nd4j.jita.allocator.Allocator) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Pair(org.deeplearning4j.berkeley.Pair)

Example 69 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class CudnnSubsamplingHelper method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode) {
    int miniBatch = input.size(0);
    int depth = input.size(1);
    int inH = input.size(2);
    int inW = input.size(3);
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
    } else {
        //Also performs validation
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
    }
    int outH = outSize[0];
    int outW = outSize[1];
    //subsampling doesn't have weights and thus gradients are not calculated for this layer
    //only scale and reshape epsilon
    Gradient retGradient = new DefaultGradient();
    //Epsilons in shape: [miniBatch, depth, outH, outW]
    //Epsilons out shape: [miniBatch, depth, inH, inW]
    int poolingMode;
    switch(poolingType) {
        case AVG:
            poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
            break;
        case MAX:
            poolingMode = CUDNN_POOLING_MAX;
            break;
        case NONE:
            return new Pair<>(retGradient, epsilon);
        default:
            return null;
    }
    if (!Shape.strideDescendingCAscendingF(epsilon)) {
        // apparently not supported by cuDNN
        epsilon = epsilon.dup();
    }
    int[] srcStride = input.stride();
    int[] deltaStride = epsilon.stride();
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, outH, outW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
    checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0], kernel[1], pad[0], pad[1], strides[0], strides[1]));
    INDArray outEpsilon = Nd4j.create(new int[] { miniBatch, depth, inH, inW }, 'c');
    int[] dstStride = outEpsilon.stride();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(input, epsilon, reduced, outEpsilon);
    Pointer srcData = allocator.getPointer(input, context);
    Pointer epsData = allocator.getPointer(epsilon, context);
    Pointer zData = allocator.getPointer(reduced, context);
    Pointer dstData = allocator.getPointer(outEpsilon, context);
    checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
    checkCudnn(cudnnPoolingBackward(cudnnContext, cudnnContext.poolingDesc, alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, beta, cudnnContext.dstTensorDesc, dstData));
    allocator.registerAction(context, input, epsilon, reduced, outEpsilon);
    return new Pair<>(retGradient, outEpsilon);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) Allocator(org.nd4j.jita.allocator.Allocator) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) DoublePointer(org.bytedeco.javacpp.DoublePointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) ShortPointer(org.bytedeco.javacpp.ShortPointer) Pointer(org.bytedeco.javacpp.Pointer) Pair(org.deeplearning4j.berkeley.Pair)

Example 70 with Gradient

use of org.deeplearning4j.nn.gradient.Gradient in project deeplearning4j by deeplearning4j.

the class CudnnLocalResponseNormalizationHelper method backpropGradient.

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta) {
    if (n < CUDNN_LRN_MIN_N) {
        throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
    }
    if (n > CUDNN_LRN_MAX_N) {
        throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
    }
    if (k < CUDNN_LRN_MIN_K) {
        throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
    }
    if (beta < CUDNN_LRN_MIN_BETA) {
        throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
    }
    int miniBatch = input.size(0);
    int depth = input.size(1);
    int inH = input.size(2);
    int inW = input.size(3);
    Gradient retGradient = new DefaultGradient();
    if (!Shape.strideDescendingCAscendingF(epsilon)) {
        // apparently not supported by cuDNN
        epsilon = epsilon.dup();
    }
    int[] srcStride = input.stride();
    int[] deltaStride = epsilon.stride();
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, inH, inW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
    checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
    INDArray nextEpsilon = Nd4j.createUninitialized(new int[] { miniBatch, depth, inH, inW }, 'c');
    int[] dstStride = nextEpsilon.stride();
    checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, activations, nextEpsilon);
    Pointer srcData = allocator.getPointer(input, context);
    Pointer epsData = allocator.getPointer(epsilon, context);
    Pointer zData = allocator.getPointer(activations, context);
    Pointer dstData = allocator.getPointer(nextEpsilon, context);
    checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
    checkCudnn(cudnnLRNCrossChannelBackward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
    allocator.getFlowController().registerActionAllWrite(context, input, epsilon, activations, nextEpsilon);
    return new Pair<>(retGradient, nextEpsilon);
}
Also used : AtomicAllocator(org.nd4j.jita.allocator.impl.AtomicAllocator) Allocator(org.nd4j.jita.allocator.Allocator) Gradient(org.deeplearning4j.nn.gradient.Gradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) DefaultGradient(org.deeplearning4j.nn.gradient.DefaultGradient) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) CudaContext(org.nd4j.linalg.jcublas.context.CudaContext) DoublePointer(org.bytedeco.javacpp.DoublePointer) FloatPointer(org.bytedeco.javacpp.FloatPointer) ShortPointer(org.bytedeco.javacpp.ShortPointer) Pointer(org.bytedeco.javacpp.Pointer) Pair(org.deeplearning4j.berkeley.Pair)

Aggregations

Gradient (org.deeplearning4j.nn.gradient.Gradient)105 INDArray (org.nd4j.linalg.api.ndarray.INDArray)100 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)72 Test (org.junit.Test)52 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)35 Pair (org.deeplearning4j.berkeley.Pair)28 Layer (org.deeplearning4j.nn.api.Layer)28 Updater (org.deeplearning4j.nn.api.Updater)25 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)24 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)21 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)9 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)8 IActivation (org.nd4j.linalg.activations.IActivation)6 HashMap (java.util.HashMap)5 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)5 ArrayList (java.util.ArrayList)4 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)4 DL4JInvalidInputException (org.deeplearning4j.exception.DL4JInvalidInputException)4 IOutputLayer (org.deeplearning4j.nn.api.layers.IOutputLayer)4 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)4