Search in sources :

Example 11 with SparseBlockCOO

use of org.apache.sysml.runtime.matrix.data.SparseBlockCOO in project incubator-systemml by apache.

the class GPUObject method copyFromHostToDevice.

void copyFromHostToDevice() throws DMLRuntimeException {
    LOG.trace("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + getGPUContext());
    long start = 0;
    if (DMLScript.STATISTICS)
        start = System.nanoTime();
    MatrixBlock tmp = mat.acquireRead();
    if (tmp.isInSparseFormat()) {
        int[] rowPtr = null;
        int[] colInd = null;
        double[] values = null;
        tmp.recomputeNonZeros();
        long nnz = tmp.getNonZeros();
        mat.getMatrixCharacteristics().setNonZeros(nnz);
        SparseBlock block = tmp.getSparseBlock();
        boolean copyToDevice = true;
        if (block == null && tmp.getNonZeros() == 0) {
            //				// Allocate empty block --> not necessary
            //				// To reproduce this, see org.apache.sysml.test.integration.applications.dml.ID3DMLTest
            //				rowPtr = new int[0];
            //				colInd = new int[0];
            //				values = new double[0];
            copyToDevice = false;
        } else if (block == null && tmp.getNonZeros() != 0) {
            throw new DMLRuntimeException("Expected CP sparse block to be not null.");
        } else {
            // CSR is the preferred format for cuSparse GEMM
            // Converts MCSR and COO to CSR
            SparseBlockCSR csrBlock = null;
            long t0 = 0;
            if (block instanceof SparseBlockCSR) {
                csrBlock = (SparseBlockCSR) block;
            } else if (block instanceof SparseBlockCOO) {
                // TODO - should we do this on the GPU using cusparse<t>coo2csr() ?
                if (DMLScript.STATISTICS)
                    t0 = System.nanoTime();
                SparseBlockCOO cooBlock = (SparseBlockCOO) block;
                csrBlock = new SparseBlockCSR(toIntExact(mat.getNumRows()), cooBlock.rowIndexes(), cooBlock.indexes(), cooBlock.values());
                if (DMLScript.STATISTICS)
                    GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
                if (DMLScript.STATISTICS)
                    GPUStatistics.cudaSparseConversionCount.incrementAndGet();
            } else if (block instanceof SparseBlockMCSR) {
                if (DMLScript.STATISTICS)
                    t0 = System.nanoTime();
                SparseBlockMCSR mcsrBlock = (SparseBlockMCSR) block;
                csrBlock = new SparseBlockCSR(mcsrBlock.getRows(), toIntExact(mcsrBlock.size()));
                if (DMLScript.STATISTICS)
                    GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
                if (DMLScript.STATISTICS)
                    GPUStatistics.cudaSparseConversionCount.incrementAndGet();
            } else {
                throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
            }
            rowPtr = csrBlock.rowPointers();
            colInd = csrBlock.indexes();
            values = csrBlock.values();
        }
        allocateSparseMatrixOnDevice();
        if (copyToDevice) {
            CSRPointer.copyToDevice(getJcudaSparseMatrixPtr(), tmp.getNumRows(), tmp.getNonZeros(), rowPtr, colInd, values);
        }
    } else {
        double[] data = tmp.getDenseBlock();
        if (data == null && tmp.getSparseBlock() != null)
            throw new DMLRuntimeException("Incorrect sparsity calculation");
        else if (data == null && tmp.getNonZeros() != 0)
            throw new DMLRuntimeException("MatrixBlock is not allocated");
        else if (tmp.getNonZeros() == 0)
            data = new double[tmp.getNumRows() * tmp.getNumColumns()];
        // Copy dense block
        allocateDenseMatrixOnDevice();
        cudaMemcpy(getJcudaDenseMatrixPtr(), Pointer.to(data), getDoubleSizeOf(mat.getNumRows() * mat.getNumColumns()), cudaMemcpyHostToDevice);
    }
    mat.release();
    if (DMLScript.STATISTICS)
        GPUStatistics.cudaToDevTime.addAndGet(System.nanoTime() - start);
    if (DMLScript.STATISTICS)
        GPUStatistics.cudaToDevCount.addAndGet(1);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SparseBlockMCSR(org.apache.sysml.runtime.matrix.data.SparseBlockMCSR) SparseBlockCSR(org.apache.sysml.runtime.matrix.data.SparseBlockCSR) SparseBlock(org.apache.sysml.runtime.matrix.data.SparseBlock) SparseBlockCOO(org.apache.sysml.runtime.matrix.data.SparseBlockCOO) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

SparseBlock (org.apache.sysml.runtime.matrix.data.SparseBlock)11 SparseBlockCOO (org.apache.sysml.runtime.matrix.data.SparseBlockCOO)11 SparseBlockCSR (org.apache.sysml.runtime.matrix.data.SparseBlockCSR)11 SparseBlockMCSR (org.apache.sysml.runtime.matrix.data.SparseBlockMCSR)11 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 IJV (org.apache.sysml.runtime.matrix.data.IJV)3 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2 LongLongDoubleHashMap (org.apache.sysml.runtime.util.LongLongDoubleHashMap)2 ADoubleEntry (org.apache.sysml.runtime.util.LongLongDoubleHashMap.ADoubleEntry)2 Iterator (java.util.Iterator)1