Search in sources :

Example 41 with Pointer

use of jcuda.Pointer in project incubator-systemml by apache.

the class GPUObject method denseColumnMajorToRowMajor.

/**
	 * Convenience method. Converts Column Major Dense Matrix to Row Major Dense Matrix
	 * @throws DMLRuntimeException if error
	 */
public void denseColumnMajorToRowMajor() throws DMLRuntimeException {
    LOG.trace("GPU : dense Ptr row-major -> col-major on " + this + ", GPUContext=" + getGPUContext());
    int n = toIntExact(mat.getNumRows());
    int m = toIntExact(mat.getNumColumns());
    int lda = n;
    int ldc = m;
    if (!isAllocated()) {
        throw new DMLRuntimeException("Error in converting column major to row major : data is not allocated");
    }
    Pointer tmp = transpose(getGPUContext(), getJcudaDenseMatrixPtr(), m, n, lda, ldc);
    cudaFreeHelper(getJcudaDenseMatrixPtr());
    setDenseMatrixCudaPointer(tmp);
}
Also used : Pointer(jcuda.Pointer) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 42 with Pointer

use of jcuda.Pointer in project incubator-systemml by apache.

the class JCudaKernels method launchKernel.

/**
	 * Setups the kernel parameters and launches the kernel using cuLaunchKernel API. 
	 * This function currently supports two dimensional grid and blocks.
	 * 
	 * @param name name of the kernel
	 * @param config execution configuration
	 * @param arguments can be of type Pointer, long, double, float and int
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
public void launchKernel(String name, ExecutionConfig config, Object... arguments) throws DMLRuntimeException {
    CUfunction function = kernels.get(name);
    if (function == null) {
        // caching functions into hashmap reduces the lookup overhead
        function = new CUfunction();
        checkResult(cuModuleGetFunction(function, module, name));
    }
    // Setup parameters
    Pointer[] kernelParams = new Pointer[arguments.length];
    for (int i = 0; i < arguments.length; i++) {
        if (arguments[i] == null) {
            throw new DMLRuntimeException("The argument to the kernel cannot be null.");
        } else if (arguments[i] instanceof Pointer) {
            kernelParams[i] = Pointer.to((Pointer) arguments[i]);
        } else if (arguments[i] instanceof Integer) {
            kernelParams[i] = Pointer.to(new int[] { (Integer) arguments[i] });
        } else if (arguments[i] instanceof Double) {
            kernelParams[i] = Pointer.to(new double[] { (Double) arguments[i] });
        } else if (arguments[i] instanceof Long) {
            kernelParams[i] = Pointer.to(new long[] { (Long) arguments[i] });
        } else if (arguments[i] instanceof Float) {
            kernelParams[i] = Pointer.to(new float[] { (Float) arguments[i] });
        } else {
            throw new DMLRuntimeException("The argument of type " + arguments[i].getClass() + " is not supported.");
        }
    }
    // Launches the kernel using CUDA's driver API.
    checkResult(cuLaunchKernel(function, config.gridDimX, config.gridDimY, config.gridDimZ, config.blockDimX, config.blockDimY, config.blockDimZ, config.sharedMemBytes, config.stream, Pointer.to(kernelParams), null));
    JCuda.cudaDeviceSynchronize();
}
Also used : CUfunction(jcuda.driver.CUfunction) Pointer(jcuda.Pointer) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 43 with Pointer

use of jcuda.Pointer in project incubator-systemml by apache.

the class GPUObject method allocateDenseMatrixOnDevice.

void allocateDenseMatrixOnDevice() throws DMLRuntimeException {
    LOG.trace("GPU : allocateDenseMatrixOnDevice, on " + this + ", GPUContext=" + getGPUContext());
    assert !isAllocated() : "Internal error - trying to allocated dense matrix to a GPUObject that is already allocated";
    long rows = mat.getNumRows();
    long cols = mat.getNumColumns();
    assert rows > 0 : "Internal error - invalid number of rows when allocating dense matrix";
    assert cols > 0 : "Internal error - invalid number of columns when allocating dense matrix;";
    long size = getDoubleSizeOf(rows * cols);
    Pointer tmp = allocate(size);
    setDenseMatrixCudaPointer(tmp);
}
Also used : Pointer(jcuda.Pointer)

Example 44 with Pointer

use of jcuda.Pointer in project incubator-systemml by apache.

the class GPUObject method columnMajorDenseToRowMajorSparse.

/**
	 * Convenience method to convert a CSR matrix to a dense matrix on the GPU
	 * Since the allocated matrix is temporary, bookkeeping is not updated.
	 * Also note that the input dense matrix is expected to be in COLUMN MAJOR FORMAT
	 * Caller is responsible for deallocating memory on GPU.
	 * @param gCtx   a valid {@link GPUContext}
	 * @param cusparseHandle handle to cusparse library
	 * @param densePtr [in] dense matrix pointer on the GPU in row major
	 * @param rows number of rows
	 * @param cols number of columns
	 * @return CSR (compressed sparse row) pointer
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
public static CSRPointer columnMajorDenseToRowMajorSparse(GPUContext gCtx, cusparseHandle cusparseHandle, Pointer densePtr, int rows, int cols) throws DMLRuntimeException {
    cusparseMatDescr matDescr = CSRPointer.getDefaultCuSparseMatrixDescriptor();
    Pointer nnzPerRowPtr = null;
    Pointer nnzTotalDevHostPtr = null;
    gCtx.ensureFreeSpace(getIntSizeOf(rows + 1));
    nnzPerRowPtr = gCtx.allocate(getIntSizeOf(rows));
    nnzTotalDevHostPtr = gCtx.allocate(getIntSizeOf(1));
    // Output is in dense vector format, convert it to CSR
    cusparseDnnz(cusparseHandle, cusparseDirection.CUSPARSE_DIRECTION_ROW, rows, cols, matDescr, densePtr, rows, nnzPerRowPtr, nnzTotalDevHostPtr);
    //cudaDeviceSynchronize();
    int[] nnzC = { -1 };
    long t2 = 0;
    if (DMLScript.STATISTICS)
        t2 = System.nanoTime();
    cudaMemcpy(Pointer.to(nnzC), nnzTotalDevHostPtr, getIntSizeOf(1), cudaMemcpyDeviceToHost);
    if (DMLScript.STATISTICS)
        GPUStatistics.cudaFromDevTime.addAndGet(System.nanoTime() - t2);
    if (DMLScript.STATISTICS)
        GPUStatistics.cudaFromDevCount.addAndGet(1);
    if (nnzC[0] == -1) {
        throw new DMLRuntimeException("cusparseDnnz did not calculate the correct number of nnz from the sparse-matrix vector mulitply on the GPU");
    }
    LOG.trace("GPU : col-major dense size[" + rows + "," + cols + "] to row-major sparse of with nnz = " + nnzC[0] + ", GPUContext=" + gCtx);
    CSRPointer C = CSRPointer.allocateEmpty(gCtx, nnzC[0], rows);
    cusparseDdense2csr(cusparseHandle, rows, cols, matDescr, densePtr, rows, nnzPerRowPtr, C.val, C.rowPtr, C.colInd);
    //cudaDeviceSynchronize();
    gCtx.cudaFreeHelper(nnzPerRowPtr);
    gCtx.cudaFreeHelper(nnzTotalDevHostPtr);
    return C;
}
Also used : jcuda.jcusparse.cusparseMatDescr(jcuda.jcusparse.cusparseMatDescr) Pointer(jcuda.Pointer) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

Pointer (jcuda.Pointer)44 CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)33 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)26 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)12 jcuda.jcudnn.cudnnTensorDescriptor (jcuda.jcudnn.cudnnTensorDescriptor)11 CudaException (jcuda.CudaException)6 jcuda.jcudnn.cudnnConvolutionDescriptor (jcuda.jcudnn.cudnnConvolutionDescriptor)3 jcuda.jcudnn.cudnnFilterDescriptor (jcuda.jcudnn.cudnnFilterDescriptor)3 HashMap (java.util.HashMap)2 jcuda.jcudnn.cudnnPoolingDescriptor (jcuda.jcudnn.cudnnPoolingDescriptor)2 GPUObject (org.apache.sysml.runtime.instructions.gpu.context.GPUObject)2 LeftScalarOperator (org.apache.sysml.runtime.matrix.operators.LeftScalarOperator)2 RightScalarOperator (org.apache.sysml.runtime.matrix.operators.RightScalarOperator)2 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 CUfunction (jcuda.driver.CUfunction)1 jcuda.jcudnn.cudnnActivationDescriptor (jcuda.jcudnn.cudnnActivationDescriptor)1 jcuda.jcusparse.cusparseMatDescr (jcuda.jcusparse.cusparseMatDescr)1 Builtin (org.apache.sysml.runtime.functionobjects.Builtin)1 CM (org.apache.sysml.runtime.functionobjects.CM)1