use of jcuda.driver.CUfunction in project incubator-systemml by apache.
the class JCudaKernels method launchKernel.
/**
* Setups the kernel parameters and launches the kernel using cuLaunchKernel API.
* This function currently supports two dimensional grid and blocks.
*
* @param name name of the kernel
* @param config execution configuration
* @param arguments can be of type Pointer, long, double, float and int
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public void launchKernel(String name, ExecutionConfig config, Object... arguments) throws DMLRuntimeException {
CUfunction function = kernels.get(name);
if (function == null) {
// caching functions into hashmap reduces the lookup overhead
function = new CUfunction();
checkResult(cuModuleGetFunction(function, module, name));
}
// Setup parameters
Pointer[] kernelParams = new Pointer[arguments.length];
for (int i = 0; i < arguments.length; i++) {
if (arguments[i] == null) {
throw new DMLRuntimeException("The argument to the kernel cannot be null.");
} else if (arguments[i] instanceof Pointer) {
kernelParams[i] = Pointer.to((Pointer) arguments[i]);
} else if (arguments[i] instanceof Integer) {
kernelParams[i] = Pointer.to(new int[] { (Integer) arguments[i] });
} else if (arguments[i] instanceof Double) {
kernelParams[i] = Pointer.to(new double[] { (Double) arguments[i] });
} else if (arguments[i] instanceof Long) {
kernelParams[i] = Pointer.to(new long[] { (Long) arguments[i] });
} else if (arguments[i] instanceof Float) {
kernelParams[i] = Pointer.to(new float[] { (Float) arguments[i] });
} else {
throw new DMLRuntimeException("The argument of type " + arguments[i].getClass() + " is not supported.");
}
}
// Launches the kernel using CUDA's driver API.
checkResult(cuLaunchKernel(function, config.gridDimX, config.gridDimY, config.gridDimZ, config.blockDimX, config.blockDimY, config.blockDimZ, config.sharedMemBytes, config.stream, Pointer.to(kernelParams), null));
JCuda.cudaDeviceSynchronize();
}
Aggregations