Search in sources :

Example 6 with CSRPointer

use of org.apache.sysml.runtime.instructions.gpu.context.CSRPointer in project incubator-systemml by apache.

the class LibMatrixCUDA method sparseMatrixVectorMult.

/**
	 * Does a sparse matrix-vector multiply.
	 * C = op(A) x B, A is a sparse matrix, B is a sparse vector with numCols = 1.
	 * @param gCtx   a valid {@link GPUContext}
	 * @param instName      the invoking instruction's name for record {@link Statistics}.
	 * @param output        allocated output object C to which the GPU output matrix will be attached
	 * @param isATranposed  if A is to be transposed or not (the op in op(A))
	 * @param m             number of rows in A (not op(A))
	 * @param n             number of cols in A (not op(A))
	 * @param k             number of rows in B, (cols in B is assumed to be 1)
	 * @param A             left sparse matrix on GPU
	 * @param B             right sparse vector on GPU
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private static void sparseMatrixVectorMult(GPUContext gCtx, String instName, MatrixObject output, boolean isATranposed, int m, int n, int k, CSRPointer A, CSRPointer B) throws DMLRuntimeException {
    long t0 = 0;
    if (GPUStatistics.DISPLAY_STATISTICS)
        t0 = System.nanoTime();
    Pointer BDenseVector = B.toColumnMajorDenseMatrix(getCusparseHandle(gCtx), getCublasHandle(gCtx), k, 1);
    if (GPUStatistics.DISPLAY_STATISTICS)
        GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_TO_DENSE, System.nanoTime() - t0);
    sparseMatrixDenseVectorMult(gCtx, instName, output, A, BDenseVector, isATranposed, m, k);
}
Also used : CSRPointer(org.apache.sysml.runtime.instructions.gpu.context.CSRPointer) Pointer(jcuda.Pointer)

Example 7 with CSRPointer

use of org.apache.sysml.runtime.instructions.gpu.context.CSRPointer in project incubator-systemml by apache.

the class LibMatrixCUDA method sparseDenseMatmult.

/**
	 * * C = op(A) * op(B) where A is sparse and B is dense
	 * If A is ultrasparse, B is converted to a sparse matrix and {@code sparseSparseMatmult(MatrixObject, int, int, int, int, int, CSRPointer, CSRPointer)} is invoked
	 * otherwise A is converted to a dense matrix and {@code denseDenseMatmult(Pointer, int, int, int, int, boolean, boolean, Pointer, Pointer)} is invoked.
	 * @param gCtx   a valid {@link GPUContext}
	 * @param instName the invoking instruction's name for record {@link Statistics}.
	 * @param output the output matrix object
	 * @param left matrix A
	 * @param right matrix B
	 * @param isLeftTransposed if A needs to be transposed
	 * @param isRightTransposed if B needs to be transposed
	 * @param m ?
	 * @param n ?
	 * @param k ?
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private static void sparseDenseMatmult(GPUContext gCtx, String instName, MatrixObject output, MatrixObject left, MatrixObject right, boolean isLeftTransposed, boolean isRightTransposed, int m, int n, int k) throws DMLRuntimeException {
    CSRPointer A = left.getGPUObject(gCtx).getJcudaSparseMatrixPtr();
    Pointer BDense = getDensePointer(gCtx, right, instName);
    if (n == 1) {
        // Sparse Matrix - Dense Vector multiply
        sparseMatrixDenseVectorMult(gCtx, instName, output, A, BDense, isLeftTransposed, (int) left.getNumRows(), (int) left.getNumColumns());
    } else {
        long t0 = 0, t1 = 0, t2 = 0;
        // Sparse Matrix Dense Matrix multiply
        if (A.isUltraSparse(m, k)) {
            LOG.trace(" GPU : Convert sp M %*% d M --> sp M %*% sp M" + ", GPUContext=" + gCtx);
            // Convert right to CSR and do cuSparse matmul
            int rowsB = (int) right.getNumRows();
            int colsB = (int) right.getNumColumns();
            if (DMLScript.STATISTICS)
                t0 = System.nanoTime();
            Pointer BT = GPUObject.transpose(gCtx, BDense, rowsB, colsB, colsB, rowsB);
            if (GPUStatistics.DISPLAY_STATISTICS)
                GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_TRANSPOSE_LIB, System.nanoTime() - t0);
            if (GPUStatistics.DISPLAY_STATISTICS)
                t1 = System.nanoTime();
            CSRPointer B = GPUObject.columnMajorDenseToRowMajorSparse(gCtx, getCusparseHandle(gCtx), BT, rowsB, colsB);
            if (GPUStatistics.DISPLAY_STATISTICS)
                GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DENSE_TO_SPARSE, System.nanoTime() - t1);
            if (DMLScript.STATISTICS)
                GPUStatistics.cudaDenseToSparseTime.getAndAdd(System.nanoTime() - t0);
            if (DMLScript.STATISTICS)
                GPUStatistics.cudaDenseToSparseCount.getAndAdd(1);
            sparseSparseMatmult(gCtx, instName, A, B, output, isLeftTransposed, isRightTransposed, m, n, k);
            if (GPUStatistics.DISPLAY_STATISTICS)
                t2 = System.nanoTime();
            B.deallocate();
            gCtx.cudaFreeHelper(BT);
            if (GPUStatistics.DISPLAY_STATISTICS)
                GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_CUDA_FREE, System.nanoTime() - t2, 2);
        } else {
            LOG.trace(" GPU : Convert sp M %*% d M --> d M %*% d M" + ", GPUContext=" + gCtx);
            // Note the arguments to denseDenseMatmult to accommodate for this.
            if (DMLScript.STATISTICS)
                t0 = System.nanoTime();
            Pointer ADenseTransposed = A.toColumnMajorDenseMatrix(getCusparseHandle(gCtx), getCublasHandle(gCtx), (int) left.getNumRows(), (int) left.getNumColumns());
            if (GPUStatistics.DISPLAY_STATISTICS)
                GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_SPARSE_TO_DENSE, System.nanoTime() - t0);
            if (DMLScript.STATISTICS)
                GPUStatistics.cudaSparseToDenseTime.getAndAdd(System.nanoTime() - t0);
            if (DMLScript.STATISTICS)
                GPUStatistics.cudaSparseToDenseCount.getAndAdd(System.nanoTime() - t0);
            if (GPUStatistics.DISPLAY_STATISTICS)
                t1 = System.nanoTime();
            // To allocate the dense matrix
            boolean allocated = output.getGPUObject(gCtx).acquireDeviceModifyDense();
            if (allocated && GPUStatistics.DISPLAY_STATISTICS)
                GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_ALLOCATE_DENSE_OUTPUT, System.nanoTime() - t1);
            Pointer C = getDensePointer(gCtx, output, instName);
            denseDenseMatmult(gCtx, instName, C, (int) left.getNumColumns(), (int) left.getNumRows(), (int) right.getNumRows(), (int) right.getNumColumns(), !isLeftTransposed, isRightTransposed, ADenseTransposed, BDense);
            gCtx.cudaFreeHelper(instName, ADenseTransposed);
        }
    }
}
Also used : CSRPointer(org.apache.sysml.runtime.instructions.gpu.context.CSRPointer) Pointer(jcuda.Pointer) CSRPointer(org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)

Aggregations

CSRPointer (org.apache.sysml.runtime.instructions.gpu.context.CSRPointer)7 Pointer (jcuda.Pointer)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)1