use of org.nd4j.linalg.api.ops.executioner.GridExecutioner in project deeplearning4j by deeplearning4j.
the class CudnnSubsamplingHelper method activate.
@Override
public INDArray activate(INDArray input, boolean training, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode) {
int miniBatch = input.size(0);
int inDepth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] outSize;
if (convolutionMode == ConvolutionMode.Same) {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode);
pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] { input.size(2), input.size(3) }, kernel, strides);
} else {
//Also performs validation
outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode);
}
int outH = outSize[0];
int outW = outSize[1];
int poolingMode;
switch(poolingType) {
case AVG:
poolingMode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
case MAX:
poolingMode = CUDNN_POOLING_MAX;
break;
case NONE:
return input;
default:
return null;
}
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
int[] srcStride = input.stride();
checkCudnn(cudnnSetPooling2dDescriptor(cudnnContext.poolingDesc, poolingMode, CUDNN_PROPAGATE_NAN, kernel[0], kernel[1], pad[0], pad[1], strides[0], strides[1]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
reduced = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, outH, outW }, 'c');
int[] dstStride = reduced.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, outH, outW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareAction(input, reduced);
Pointer srcData = allocator.getPointer(input, context);
Pointer dstData = allocator.getPointer(reduced, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnPoolingForward(cudnnContext, cudnnContext.poolingDesc, alpha, cudnnContext.srcTensorDesc, srcData, beta, cudnnContext.dstTensorDesc, dstData));
allocator.registerAction(context, input, reduced);
return reduced;
}
use of org.nd4j.linalg.api.ops.executioner.GridExecutioner in project deeplearning4j by deeplearning4j.
the class CudnnBatchNormalizationHelper method preOutput.
@Override
public INDArray preOutput(INDArray x, boolean training, int[] shape, INDArray gamma, INDArray beta, INDArray mean, INDArray var, double decay, double eps) {
if (eps < CUDNN_BN_MIN_EPSILON) {
throw new IllegalArgumentException("Error: eps < CUDNN_BN_MIN_EPSILON (" + eps + " < " + CUDNN_BN_MIN_EPSILON + ")");
}
int miniBatch = x.size(0);
int inDepth = x.size(1);
int inH = x.size(2);
int inW = x.size(3);
int[] srcStride = x.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
INDArray activations = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, inH, inW }, 'c');
int[] dstStride = activations.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
int[] gammaStride = gamma.stride();
checkCudnn(cudnnSetTensor4dDescriptor(cudnnContext.gammaBetaTensorDesc, tensorFormat, dataType, shape[0], shape[1], shape.length > 2 ? shape[2] : 1, shape.length > 3 ? shape[3] : 1));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(x, activations, gamma, beta, mean, var);
Pointer srcData = allocator.getPointer(x, context);
Pointer dstData = allocator.getPointer(activations, context);
Pointer gammaData = allocator.getPointer(gamma, context);
Pointer betaData = allocator.getPointer(beta, context);
Pointer meanData = allocator.getPointer(mean, context);
Pointer varData = allocator.getPointer(var, context);
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
if (training) {
if (meanCache.capacity() < mean.data().length() * mean.data().getElementSize()) {
meanCache.deallocate();
meanCache = new Cache(mean.data().length() * mean.data().getElementSize());
}
if (varCache.capacity() < var.data().length() * mean.data().getElementSize()) {
varCache.deallocate();
varCache = new Cache(var.data().length() * mean.data().getElementSize());
}
checkCudnn(cudnnBatchNormalizationForwardTraining(cudnnContext, batchNormMode, this.alpha, this.beta, cudnnContext.srcTensorDesc, srcData, cudnnContext.dstTensorDesc, dstData, cudnnContext.gammaBetaTensorDesc, gammaData, betaData, decay, meanData, varData, eps, meanCache, varCache));
} else {
checkCudnn(cudnnBatchNormalizationForwardInference(cudnnContext, batchNormMode, this.alpha, this.beta, cudnnContext.srcTensorDesc, srcData, cudnnContext.dstTensorDesc, dstData, cudnnContext.gammaBetaTensorDesc, gammaData, betaData, meanData, varData, eps));
}
allocator.getFlowController().registerActionAllWrite(context, x, activations, gamma, beta, mean, var);
return activations;
}
use of org.nd4j.linalg.api.ops.executioner.GridExecutioner in project deeplearning4j by deeplearning4j.
the class CudnnLocalResponseNormalizationHelper method activate.
@Override
public INDArray activate(INDArray input, boolean training, double k, double n, double alpha, double beta) {
if (n < CUDNN_LRN_MIN_N) {
throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
}
if (n > CUDNN_LRN_MAX_N) {
throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
}
if (k < CUDNN_LRN_MIN_K) {
throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
}
if (beta < CUDNN_LRN_MIN_BETA) {
throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
}
int miniBatch = input.size(0);
int inDepth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
int[] srcStride = input.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, inDepth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
activations = Nd4j.createUninitialized(new int[] { miniBatch, inDepth, inH, inW }, 'c');
int[] dstStride = activations.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, inDepth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, activations);
Pointer srcData = allocator.getPointer(input, context);
Pointer dstData = allocator.getPointer(activations, context);
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnLRNCrossChannelForward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, activations);
return activations;
}
use of org.nd4j.linalg.api.ops.executioner.GridExecutioner in project deeplearning4j by deeplearning4j.
the class CudnnLocalResponseNormalizationHelper method backpropGradient.
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta) {
if (n < CUDNN_LRN_MIN_N) {
throw new IllegalArgumentException("Error: n < CUDNN_LRN_MIN_N (" + n + " < " + CUDNN_LRN_MIN_N + ")");
}
if (n > CUDNN_LRN_MAX_N) {
throw new IllegalArgumentException("Error: n > CUDNN_LRN_MAX_N (" + n + " > " + CUDNN_LRN_MAX_N + ")");
}
if (k < CUDNN_LRN_MIN_K) {
throw new IllegalArgumentException("Error: k < CUDNN_LRN_MIN_K (" + k + " < " + CUDNN_LRN_MIN_K + ")");
}
if (beta < CUDNN_LRN_MIN_BETA) {
throw new IllegalArgumentException("Error: beta < CUDNN_LRN_MIN_BETA (" + beta + " < " + CUDNN_LRN_MIN_BETA + ")");
}
int miniBatch = input.size(0);
int depth = input.size(1);
int inH = input.size(2);
int inW = input.size(3);
Gradient retGradient = new DefaultGradient();
if (!Shape.strideDescendingCAscendingF(epsilon)) {
// apparently not supported by cuDNN
epsilon = epsilon.dup();
}
int[] srcStride = input.stride();
int[] deltaStride = epsilon.stride();
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueue();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.srcTensorDesc, dataType, miniBatch, depth, inH, inW, srcStride[0], srcStride[1], srcStride[2], srcStride[3]));
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.deltaTensorDesc, dataType, miniBatch, depth, inH, inW, deltaStride[0], deltaStride[1], deltaStride[2], deltaStride[3]));
checkCudnn(cudnnSetLRNDescriptor(cudnnContext.lrnDesc, (int) n, alpha, beta, k));
INDArray nextEpsilon = Nd4j.createUninitialized(new int[] { miniBatch, depth, inH, inW }, 'c');
int[] dstStride = nextEpsilon.stride();
checkCudnn(cudnnSetTensor4dDescriptorEx(cudnnContext.dstTensorDesc, dataType, miniBatch, depth, inH, inW, dstStride[0], dstStride[1], dstStride[2], dstStride[3]));
Allocator allocator = AtomicAllocator.getInstance();
CudaContext context = allocator.getFlowController().prepareActionAllWrite(input, epsilon, activations, nextEpsilon);
Pointer srcData = allocator.getPointer(input, context);
Pointer epsData = allocator.getPointer(epsilon, context);
Pointer zData = allocator.getPointer(activations, context);
Pointer dstData = allocator.getPointer(nextEpsilon, context);
checkCudnn(cudnnSetStream(cudnnContext, new CUstream_st(context.getOldStream())));
checkCudnn(cudnnLRNCrossChannelBackward(cudnnContext, cudnnContext.lrnDesc, CUDNN_LRN_CROSS_CHANNEL_DIM1, this.alpha, cudnnContext.deltaTensorDesc, zData, cudnnContext.deltaTensorDesc, epsData, cudnnContext.srcTensorDesc, srcData, this.beta, cudnnContext.dstTensorDesc, dstData));
allocator.getFlowController().registerActionAllWrite(context, input, epsilon, activations, nextEpsilon);
return new Pair<>(retGradient, nextEpsilon);
}
use of org.nd4j.linalg.api.ops.executioner.GridExecutioner in project deeplearning4j by deeplearning4j.
the class FeedForwardWithKeyFunctionAdapter method call.
@Override
public Iterable<Tuple2<K, INDArray>> call(Iterator<Tuple2<K, INDArray>> iterator) throws Exception {
if (!iterator.hasNext()) {
return Collections.emptyList();
}
MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(jsonConfig.getValue()));
network.init();
INDArray val = params.value().unsafeDuplication();
if (val.length() != network.numParams(false))
throw new IllegalStateException("Network did not have same number of parameters as the broadcasted set parameters");
network.setParameters(val);
//Issue: for 2d data (MLPs etc) we can just stack the examples.
//But: for 3d and 4d: in principle the data sizes could be different
//We could handle that with mask arrays - but it gets messy. The approach used here is simpler but less efficient
List<INDArray> featuresList = new ArrayList<>(batchSize);
List<K> keyList = new ArrayList<>(batchSize);
List<Integer> origSizeList = new ArrayList<>();
int[] firstShape = null;
boolean sizesDiffer = false;
int tupleCount = 0;
while (iterator.hasNext()) {
Tuple2<K, INDArray> t2 = iterator.next();
if (firstShape == null) {
firstShape = t2._2().shape();
} else if (!sizesDiffer) {
for (int i = 1; i < firstShape.length; i++) {
if (firstShape[i] != featuresList.get(tupleCount - 1).size(i)) {
sizesDiffer = true;
break;
}
}
}
featuresList.add(t2._2());
keyList.add(t2._1());
origSizeList.add(t2._2().size(0));
tupleCount++;
}
if (tupleCount == 0) {
return Collections.emptyList();
}
List<Tuple2<K, INDArray>> output = new ArrayList<>(tupleCount);
int currentArrayIndex = 0;
while (currentArrayIndex < featuresList.size()) {
int firstIdx = currentArrayIndex;
int nextIdx = currentArrayIndex;
int examplesInBatch = 0;
List<INDArray> toMerge = new ArrayList<>();
firstShape = null;
while (nextIdx < featuresList.size() && examplesInBatch < batchSize) {
if (firstShape == null) {
firstShape = featuresList.get(nextIdx).shape();
} else if (sizesDiffer) {
boolean breakWhile = false;
for (int i = 1; i < firstShape.length; i++) {
if (firstShape[i] != featuresList.get(nextIdx).size(i)) {
//Next example has a different size. So: don't add it to the current batch, just process what we have
breakWhile = true;
break;
}
}
if (breakWhile) {
break;
}
}
INDArray f = featuresList.get(nextIdx++);
toMerge.add(f);
examplesInBatch += f.size(0);
}
INDArray batchFeatures = Nd4j.concat(0, toMerge.toArray(new INDArray[toMerge.size()]));
INDArray out = network.output(batchFeatures, false);
examplesInBatch = 0;
for (int i = firstIdx; i < nextIdx; i++) {
int numExamples = origSizeList.get(i);
INDArray outputSubset = getSubset(examplesInBatch, examplesInBatch + numExamples, out);
examplesInBatch += numExamples;
output.add(new Tuple2<>(keyList.get(i), outputSubset));
}
currentArrayIndex += (nextIdx - firstIdx);
}
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
return output;
}
Aggregations