use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateInverseDecaySingleLayer.
@Test
public void testLearningRateInverseDecaySingleLayer() {
int iterations = 2;
double lr = 1e-2;
double decayRate = 2;
double power = 3;
NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Inverse).lrPolicyDecayRate(decayRate).lrPolicyPower(power).iterations(iterations).layer(new DenseLayer.Builder().nIn(nIn).nOut(nOut).updater(org.deeplearning4j.nn.conf.Updater.SGD).build()).build();
int numParams = conf.getLayer().initializer().numParams(conf);
INDArray params = Nd4j.create(1, numParams);
Layer layer = conf.getLayer().instantiate(conf, null, 0, params, true);
Updater updater = UpdaterCreator.getUpdater(layer);
Gradient gradientActual = new DefaultGradient();
gradientActual.setGradientFor(DefaultParamInitializer.WEIGHT_KEY, weightGradient);
gradientActual.setGradientFor(DefaultParamInitializer.BIAS_KEY, biasGradient);
for (int i = 0; i < iterations; i++) {
updater.update(layer, gradientActual, i, 1);
double expectedLr = calcInverseDecay(lr, decayRate, i, power);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("W"), 1e-4);
assertEquals(expectedLr, layer.conf().getLearningRateByParam("b"), 1e-4);
}
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class CudnnConvolutionHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray weights, INDArray delta, int[] kernel, int[] strides, int[] pad, INDArray biasGradView, INDArray weightGradView, IActivation afn, AlgoMode mode, ConvolutionMode convolutionMode) {
int miniBatch = input.size(0);
int inH = input.size(2);
int inW = input.size(3);
int outDepth = weights.size(0);
int inDepth = weights.size(1);
int kH = weights.size(2);
int kW = weights.size(3);
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
} else {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
}
int outH = outSize[0];
int outW = outSize[1];
if (!Shape.strideDescendingCAscendingF(delta)) {
// apparently not supported by cuDNN
delta = delta.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = delta.stride();
int[] algo1 = new int[1];
int[] algo2 = new int[1];
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, outDepth, outH, outW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetConvolution2dDescriptor(cudnnContext.convDesc, pad[0], pad[1], strides[0], strides[1], 1, 1, CUDNN_CROSS_CORRELATION));
checkCudnn(cudnnSetFilter4dDescriptor(cudnnContext.filterDesc, dataType, tensorFormat, outDepth, inDepth, kH, kW));
checkCudnn(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, 0, algo1));
checkCudnn(cudnnGetConvolutionBackwardDataAlgorithm(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.srcTensorDesc, mode == AlgoMode.NO_WORKSPACE ? CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE : CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, 0, algo2));
INDArray epsNext = Nd4j.create(new int[] { miniBatch, inDepth, inH, inW }, 'c');
int[] dstStride = epsNext.stride();
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, weights, weightGradView, biasGradView, delta, epsNext);
Pointer srcData = allocator.getPointer(input, context);
Pointer filterData = allocator.getPointer(weights, context);
Pointer filterGradData = allocator.getPointer(weightGradView, context);
Pointer biasGradData = allocator.getPointer(biasGradView, context);
Pointer deltaData = allocator.getPointer(delta, context);
Pointer dstData = allocator.getPointer(epsNext, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
checkCudnn(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnContext, cudnnContext.srcTensorDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.filterDesc, algo1[0], sizeInBytes));
long sizeInBytes1 = sizeInBytes.get(0);
checkCudnn(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnContext, cudnnContext.filterDesc, cudnnContext.deltaTensorDesc, cudnnContext.convDesc, cudnnContext.dstTensorDesc, algo2[0], sizeInBytes));
long sizeInBytes2 = sizeInBytes.get(0);
if (sizeInBytes1 > workSpace.capacity() || sizeInBytes2 > workSpace.capacity()) {
workSpace.deallocate();
workSpace = new WorkSpace(Math.max(sizeInBytes1, sizeInBytes2));
}
checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.biasTensorDesc, tensorFormat, dataType, 1, outDepth, 1, 1));
checkCudnn(cudnnConvolutionBackwardBias(cudnnContext, alpha, cudnnContext.deltaTensorDesc, deltaData, beta, cudnnContext.biasTensorDesc, biasGradData));
checkCudnn(cudnnConvolutionBackwardFilter(cudnnContext, alpha, cudnnContext.srcTensorDesc, srcData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo1[0], workSpace, workSpace.capacity(), beta, cudnnContext.filterDesc, filterGradData));
checkCudnn(cudnnConvolutionBackwardData(cudnnContext, alpha, cudnnContext.filterDesc, filterData, cudnnContext.deltaTensorDesc, deltaData, cudnnContext.convDesc, algo2[0], workSpace, workSpace.capacity(), beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, weights, weightGradView, biasGradView, delta, epsNext);
Gradient retGradient = new DefaultGradient();
retGradient.setGradientFor(ConvolutionParamInitializer.BIAS_KEY, biasGradView);
retGradient.setGradientFor(ConvolutionParamInitializer.WEIGHT_KEY, weightGradView, 'c');
return new Pair<>(retGradient, epsNext);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class CudnnSubsamplingHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode) {
int miniBatch = input.size(0);
int depth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
} else {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
}
int outH = outSize[0];
int outW = outSize[1];
//subsampling doesn't have weights and thus gradients are not calculated for this layer
//only scale and reshape epsilon
Gradient retGradient = new DefaultGradient();
//Epsilons in shape: [miniBatch, depth, outH, outW]
//Epsilons out shape: [miniBatch, depth, inH, inW]
int poolingMode;
switch(poolingType) {
case AVG:
poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
case MAX:
poolingMode = CUDNN_POOLING_MAX;
break;
case NONE:
return new Pair<>(retGradient, epsilon);
default:
return null;
}
if (!Shape.strideDescendingCAscendingF(epsilon)) {
// apparently not supported by cuDNN
epsilon = epsilon.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = epsilon.stride();
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, outH, outW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0], kernel[1], pad[0], pad[1], strides[0], strides[1]));
INDArray outEpsilon = Nd4j.create(new int[] { miniBatch, depth, inH, inW }, 'c');
int[] dstStride = outEpsilon.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(input, epsilon, reduced, outEpsilon);
Pointer srcData = allocator.getPointer(input, context);
Pointer epsData = allocator.getPointer(epsilon, context);
Pointer zData = allocator.getPointer(reduced, context);
Pointer dstData = allocator.getPointer(outEpsilon, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnPoolingBackward(cudnnContext, cudnnContext.poolingDesc, alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, beta, cudnnContext.dstTensorDesc, dstData));
allocator.registerAction(context, input, epsilon, reduced, outEpsilon);
return new Pair<>(retGradient, outEpsilon);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class CudnnLocalResponseNormalizationHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta) {
if (n < CUDNN_LRN_MIN_N) {
throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
}
if (n > CUDNN_LRN_MAX_N) {
throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
}
if (k < CUDNN_LRN_MIN_K) {
throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
}
if (beta < CUDNN_LRN_MIN_BETA) {
throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
}
int miniBatch = input.size(0);
int depth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
Gradient retGradient = new DefaultGradient();
if (!Shape.strideDescendingCAscendingF(epsilon)) {
// apparently not supported by cuDNN
epsilon = epsilon.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = epsilon.stride();
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, inH, inW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
INDArray nextEpsilon = Nd4j.createUninitialized(new int[] { miniBatch, depth, inH, inW }, 'c');
int[] dstStride = nextEpsilon.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, activations, nextEpsilon);
Pointer srcData = allocator.getPointer(input, context);
Pointer epsData = allocator.getPointer(epsilon, context);
Pointer zData = allocator.getPointer(activations, context);
Pointer dstData = allocator.getPointer(nextEpsilon, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnLRNCrossChannelBackward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, epsilon, activations, nextEpsilon);
return new Pair<>(retGradient, nextEpsilon);
}
use of org.deeplearning4j.nn.gradient.DefaultGradient in project deeplearning4j by deeplearning4j.
the class TestDecayPolicies method testLearningRateScheduleMLN.
@Test
public void testLearningRateScheduleMLN() {
Map<Integer, Double> learningRateAfter = new HashMap<>();
learningRateAfter.put(1, 0.2);
int iterations = 2;
int[] nIns = { 4, 2 };
int[] nOuts = { 2, 3 };
for (org.deeplearning4j.nn.conf.Updater updaterFunc : updaters) {
double lr = 1e-2;
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().learningRate(lr).learningRateDecayPolicy(LearningRatePolicy.Schedule).learningRateSchedule(learningRateAfter).iterations(iterations).updater(updaterFunc).list().layer(0, new DenseLayer.Builder().nIn(nIns[0]).nOut(nOuts[0]).build()).layer(1, new OutputLayer.Builder().nIn(nIns[1]).nOut(nOuts[1]).build()).backprop(true).pretrain(false).build();
MultiLayerNetwork net = new MultiLayerNetwork(conf);
net.init();
Updater updater = UpdaterCreator.getUpdater(net);
String wKey, bKey;
for (int i = 0; i < 2; i++) {
Gradient gradientActual = new DefaultGradient();
Gradient gradientExpected = new DefaultGradient();
for (int k = 0; k < net.getnLayers(); k++) {
wKey = String.valueOf(k) + "_" + DefaultParamInitializer.WEIGHT_KEY;
gradientActual.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
gradientExpected.setGradientFor(wKey, Nd4j.ones(nIns[k], nOuts[k]));
bKey = String.valueOf(k) + "_" + DefaultParamInitializer.BIAS_KEY;
gradientActual.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
gradientExpected.setGradientFor(bKey, Nd4j.ones(1, nOuts[k]));
}
updater.update(net, gradientActual, i, 1);
if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.SGD))
lr = testSGDComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAGRAD))
lr = testAdaGradComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.ADAM))
lr = testAdamComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
else if (updaterFunc.equals(org.deeplearning4j.nn.conf.Updater.RMSPROP))
lr = testRMSPropComputation(gradientActual, gradientExpected, lr, learningRateAfter, i);
if (i == 0)
assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), lr);
else
assertEquals(lr, net.getLayer(1).conf().getLearningRateByParam("W"), learningRateAfter.get(1));
}
}
}
Aggregations