use of jcuda.Pointer in project incubator-systemml by apache.
the class LibMatrixCUDA method conv2d.
public static void conv2d(GPUContext gCtx, String instName, MatrixObject image, MatrixObject filter, MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q) throws DMLRuntimeException {
Pointer imagePointer = getDensePointer(gCtx, image, true, instName);
Pointer filterPointer = getDensePointer(gCtx, filter, true, instName);
Pointer dstPointer = getDensePointer(gCtx, outputBlock, true, instName);
conv2d(gCtx, instName, imagePointer, filterPointer, dstPointer, N, C, H, W, K, R, S, pad_h, pad_w, stride_h, stride_w, P, Q);
}
use of jcuda.Pointer in project incubator-systemml by apache.
the class LibMatrixCUDA method batchNormalizationBackward.
/**
* This method computes the backpropagation errors for image, scale and bias of batch normalization layer
* @param gCtx a valid {@link GPUContext}
* @param instName name of the instruction
* @param image input image
* @param dout input errors of shape C, H, W
* @param scale scale (as per CuDNN) and gamma as per original paper: shape [1, C, 1, 1]
* @param ret (output) backpropagation errors for previous layer
* @param retScale backpropagation error for scale
* @param retBias backpropagation error for bias
* @param epsilon epsilon value used in the batch normalization formula
* @throws DMLRuntimeException if error occurs
*/
public static void batchNormalizationBackward(GPUContext gCtx, String instName, MatrixObject image, MatrixObject dout, MatrixObject scale, MatrixObject ret, MatrixObject retScale, MatrixObject retBias, double epsilon) throws DMLRuntimeException {
LOG.trace("GPU : batchNormalizationBackward" + ", GPUContext=" + gCtx);
int mode = cudnnBatchNormMode.CUDNN_BATCHNORM_SPATIAL;
int N = (int) image.getNumRows();
int C = (int) scale.getNumColumns();
long CHW = image.getNumColumns();
// Allocate descriptors
cudnnTensorDescriptor nCHWDescriptor = allocateNCHWDescriptors(gCtx, N, C, CHW, new MatrixObject[] { image, dout }, new MatrixObject[] { ret });
cudnnTensorDescriptor scaleTensorDesc = allocateTensorDescriptor(gCtx, scale, 1, C, 1, 1);
// Get underlying dense pointer
Pointer imagePtr = getDensePointer(gCtx, image, true, instName);
Pointer doutPtr = getDensePointer(gCtx, dout, true, instName);
Pointer scalePtr = getDensePointer(gCtx, scale, true, instName);
Pointer retPtr = getDensePointer(gCtx, ret, true, instName);
Pointer retScalePtr = getDensePointer(gCtx, retScale, true, instName);
Pointer retBiasPtr = getDensePointer(gCtx, retBias, true, instName);
// ignoring resultSaveMean and resultSaveVariance as it requires state management
checkStatus(cudnnBatchNormalizationBackward(getCudnnHandle(gCtx), mode, one(), zero(), one(), zero(), nCHWDescriptor, imagePtr, nCHWDescriptor, doutPtr, nCHWDescriptor, retPtr, scaleTensorDesc, scalePtr, retScalePtr, retBiasPtr, epsilon, new Pointer(), new Pointer()));
}
use of jcuda.Pointer in project incubator-systemml by apache.
the class LibMatrixCUDA method performMaxpooling.
public static void performMaxpooling(GPUContext gCtx, String instName, Pointer x, cudnnTensorDescriptor xDesc, MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q) throws DMLRuntimeException {
LOG.trace("GPU : performMaxpooling" + ", GPUContext=" + gCtx);
Pointer y = getDensePointer(gCtx, outputBlock, true, instName);
cudnnPoolingDescriptor poolingDesc = null;
try {
long t1 = 0, t2 = 0;
if (GPUStatistics.DISPLAY_STATISTICS)
t1 = System.nanoTime();
// Allocate descriptors
cudnnTensorDescriptor yDesc = allocateTensorDescriptor(gCtx, outputBlock, N, C, P, Q);
poolingDesc = allocatePoolingDescriptor(R, S, pad_h, pad_w, stride_h, stride_w);
if (GPUStatistics.DISPLAY_STATISTICS)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_INIT, System.nanoTime() - t1);
if (GPUStatistics.DISPLAY_STATISTICS)
t2 = System.nanoTime();
int status = cudnnPoolingForward(getCudnnHandle(gCtx), poolingDesc, one(), xDesc, x, zero(), yDesc, y);
if (GPUStatistics.DISPLAY_STATISTICS)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_MAXPOOLING_FORWARD_LIB, System.nanoTime() - t2);
if (status != jcuda.jcudnn.cudnnStatus.CUDNN_STATUS_SUCCESS) {
throw new DMLRuntimeException("Could not executed cudnnPoolingForward: " + jcuda.jcudnn.cudnnStatus.stringFor(status));
}
} catch (CudaException e) {
throw new DMLRuntimeException("Error in conv2d in GPUContext " + gCtx.toString() + " from Thread " + Thread.currentThread().toString(), e);
} finally {
long t3 = 0;
if (GPUStatistics.DISPLAY_STATISTICS)
t3 = System.nanoTime();
if (poolingDesc != null)
cudnnDestroyPoolingDescriptor(poolingDesc);
if (GPUStatistics.DISPLAY_STATISTICS)
GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDNN_CLEANUP, System.nanoTime() - t3);
}
}
use of jcuda.Pointer in project incubator-systemml by apache.
the class LibMatrixCUDA method deviceCopy.
/**
* Performs a deep device copy of a matrix on the GPU
*
* @param ec execution context
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param src source matrix
* @param outputName destination variable name
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private static void deviceCopy(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject src, String outputName) throws DMLRuntimeException {
// TODO: FIXME: Implement sparse kernel
Pointer srcPtr = getDensePointer(gCtx, src, instName);
MatrixObject out = ec.getMatrixObject(outputName);
// Allocated the dense output matrix
getDenseMatrixOutputForGPUInstruction(ec, instName, outputName);
Pointer destPtr = getDensePointer(gCtx, out, instName);
deviceCopy(instName, srcPtr, destPtr, (int) src.getNumRows(), (int) src.getNumColumns());
}
use of jcuda.Pointer in project incubator-systemml by apache.
the class LibMatrixCUDA method maxpooling.
/**
* performs maxpooling on GPU by exploiting cudnnPoolingForward(...)
* @param gCtx a valid {@link GPUContext}
* @param instName the invoking instruction's name for record {@link Statistics}.
* @param image image as matrix object
* @param outputBlock output matrix
* @param N batch size
* @param C number of channels
* @param H height of image
* @param W width of image
* @param K number of filters
* @param R height of filter
* @param S width of filter
* @param pad_h vertical padding
* @param pad_w horizontal padding
* @param stride_h horizontal stride
* @param stride_w vertical stride
* @param P (H - R + 1 + 2*pad_h)/stride_h
* @param Q (W - S + 1 + 2*pad_w)/stride_w
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public static void maxpooling(GPUContext gCtx, String instName, MatrixObject image, MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q) throws DMLRuntimeException {
Pointer x = getDensePointer(gCtx, image, true, instName);
cudnnTensorDescriptor xDesc = allocateTensorDescriptor(gCtx, image, N, C, H, W);
performMaxpooling(gCtx, instName, x, xDesc, outputBlock, N, C, H, W, K, R, S, pad_h, pad_w, stride_h, stride_w, P, Q);
}
Aggregations