use of org.apache.sysml.runtime.matrix.data.SparseBlockCOO in project incubator-systemml by apache.
the class GPUObject method copyFromHostToDevice.
void copyFromHostToDevice() throws DMLRuntimeException {
LOG.trace("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + getGPUContext());
long start = 0;
if (DMLScript.STATISTICS)
start = System.nanoTime();
MatrixBlock tmp = mat.acquireRead();
if (tmp.isInSparseFormat()) {
int[] rowPtr = null;
int[] colInd = null;
double[] values = null;
tmp.recomputeNonZeros();
long nnz = tmp.getNonZeros();
mat.getMatrixCharacteristics().setNonZeros(nnz);
SparseBlock block = tmp.getSparseBlock();
boolean copyToDevice = true;
if (block == null && tmp.getNonZeros() == 0) {
// // Allocate empty block --> not necessary
// // To reproduce this, see org.apache.sysml.test.integration.applications.dml.ID3DMLTest
// rowPtr = new int[0];
// colInd = new int[0];
// values = new double[0];
copyToDevice = false;
} else if (block == null && tmp.getNonZeros() != 0) {
throw new DMLRuntimeException("Expected CP sparse block to be not null.");
} else {
// CSR is the preferred format for cuSparse GEMM
// Converts MCSR and COO to CSR
SparseBlockCSR csrBlock = null;
long t0 = 0;
if (block instanceof SparseBlockCSR) {
csrBlock = (SparseBlockCSR) block;
} else if (block instanceof SparseBlockCOO) {
// TODO - should we do this on the GPU using cusparse<t>coo2csr() ?
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockCOO cooBlock = (SparseBlockCOO) block;
csrBlock = new SparseBlockCSR(toIntExact(mat.getNumRows()), cooBlock.rowIndexes(), cooBlock.indexes(), cooBlock.values());
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.incrementAndGet();
} else if (block instanceof SparseBlockMCSR) {
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockMCSR mcsrBlock = (SparseBlockMCSR) block;
csrBlock = new SparseBlockCSR(mcsrBlock.getRows(), toIntExact(mcsrBlock.size()));
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.incrementAndGet();
} else {
throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
}
rowPtr = csrBlock.rowPointers();
colInd = csrBlock.indexes();
values = csrBlock.values();
}
allocateSparseMatrixOnDevice();
if (copyToDevice) {
CSRPointer.copyToDevice(getJcudaSparseMatrixPtr(), tmp.getNumRows(), tmp.getNonZeros(), rowPtr, colInd, values);
}
} else {
double[] data = tmp.getDenseBlock();
if (data == null && tmp.getSparseBlock() != null)
throw new DMLRuntimeException("Incorrect sparsity calculation");
else if (data == null && tmp.getNonZeros() != 0)
throw new DMLRuntimeException("MatrixBlock is not allocated");
else if (tmp.getNonZeros() == 0)
data = new double[tmp.getNumRows() * tmp.getNumColumns()];
// Copy dense block
allocateDenseMatrixOnDevice();
cudaMemcpy(getJcudaDenseMatrixPtr(), Pointer.to(data), getDoubleSizeOf(mat.getNumRows() * mat.getNumColumns()), cudaMemcpyHostToDevice);
}
mat.release();
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevTime.addAndGet(System.nanoTime() - start);
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevCount.addAndGet(1);
}
Aggregations