use of org.apache.sysml.runtime.matrix.data.SparseBlockCSR in project incubator-systemml by apache.
the class SparseBlockIterator method runSparseBlockIteratorTest.
/**
* @param sparseM1
* @param sparseM2
* @param instType
*/
private void runSparseBlockIteratorTest(SparseBlock.Type btype, double sparsity, boolean partial) {
try {
// data generation
double[][] A = getRandomMatrix(rows, cols, -10, 10, sparsity, 8765432);
// init sparse block
SparseBlock sblock = null;
MatrixBlock mbtmp = DataConverter.convertToMatrixBlock(A);
SparseBlock srtmp = mbtmp.getSparseBlock();
switch(btype) {
case MCSR:
sblock = new SparseBlockMCSR(srtmp);
break;
case CSR:
sblock = new SparseBlockCSR(srtmp);
break;
case COO:
sblock = new SparseBlockCOO(srtmp);
break;
}
// check for correct number of non-zeros
int[] rnnz = new int[rows];
int nnz = 0;
int rl = partial ? rlPartial : 0;
for (int i = rl; i < rows; i++) {
for (int j = 0; j < cols; j++) rnnz[i] += (A[i][j] != 0) ? 1 : 0;
nnz += rnnz[i];
}
if (!partial && nnz != sblock.size())
Assert.fail("Wrong number of non-zeros: " + sblock.size() + ", expected: " + nnz);
// check correct isEmpty return
for (int i = rl; i < rows; i++) if (sblock.isEmpty(i) != (rnnz[i] == 0))
Assert.fail("Wrong isEmpty(row) result for row nnz: " + rnnz[i]);
// check correct values
Iterator<IJV> iter = !partial ? sblock.getIterator() : sblock.getIterator(rl, rows);
int count = 0;
while (iter.hasNext()) {
IJV cell = iter.next();
if (cell.getV() != A[cell.getI()][cell.getJ()])
Assert.fail("Wrong value returned by iterator: " + cell.getV() + ", expected: " + A[cell.getI()][cell.getJ()]);
count++;
}
if (count != nnz)
Assert.fail("Wrong number of values returned by iterator: " + count + ", expected: " + nnz);
} catch (Exception ex) {
ex.printStackTrace();
throw new RuntimeException(ex);
}
}
use of org.apache.sysml.runtime.matrix.data.SparseBlockCSR in project incubator-systemml by apache.
the class GPUObject method copyFromDeviceToHost.
protected void copyFromDeviceToHost(String instName, boolean isEviction) {
if (LOG.isTraceEnabled()) {
LOG.trace("GPU : copyFromDeviceToHost, on " + this + ", GPUContext=" + getGPUContext());
}
if (getJcudaDenseMatrixPtr() != null && getJcudaSparseMatrixPtr() != null) {
throw new DMLRuntimeException("Invalid state : JCuda dense/sparse pointer are both allocated");
}
if (getJcudaDenseMatrixPtr() != null) {
long start = 0;
if (DMLScript.STATISTICS)
start = System.nanoTime();
MatrixBlock tmp = new MatrixBlock(toIntExact(mat.getNumRows()), toIntExact(mat.getNumColumns()), false);
tmp.allocateDenseBlock();
LibMatrixCUDA.cudaSupportFunctions.deviceToHost(getGPUContext(), getJcudaDenseMatrixPtr(), tmp.getDenseBlockValues(), instName, isEviction);
tmp.recomputeNonZeros();
mat.acquireModify(tmp);
mat.release();
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevTime.add(System.nanoTime() - start);
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevCount.add(1);
} else if (getJcudaSparseMatrixPtr() != null) {
if (!LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
throw new DMLRuntimeException("Block not in sparse format on host yet the device sparse matrix pointer is not null");
if (this.isSparseAndEmpty()) {
// Empty Block
MatrixBlock tmp = new MatrixBlock((int) mat.getNumRows(), (int) mat.getNumColumns(), 0l);
mat.acquireModify(tmp);
mat.release();
} else {
long start = 0;
if (DMLScript.STATISTICS)
start = System.nanoTime();
int rows = toIntExact(mat.getNumRows());
int cols = toIntExact(mat.getNumColumns());
int nnz = toIntExact(getJcudaSparseMatrixPtr().nnz);
double[] values = new double[nnz];
LibMatrixCUDA.cudaSupportFunctions.deviceToHost(getGPUContext(), getJcudaSparseMatrixPtr().val, values, instName, isEviction);
int[] rowPtr = new int[rows + 1];
int[] colInd = new int[nnz];
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
CSRPointer.copyPtrToHost(getJcudaSparseMatrixPtr(), rows, nnz, rowPtr, colInd);
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevTime.add(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevCount.add(3);
SparseBlockCSR sparseBlock = new SparseBlockCSR(rowPtr, colInd, values, nnz);
MatrixBlock tmp = new MatrixBlock(rows, cols, nnz, sparseBlock);
mat.acquireModify(tmp);
mat.release();
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevTime.add(System.nanoTime() - start);
if (DMLScript.STATISTICS)
GPUStatistics.cudaFromDevCount.add(1);
}
} else {
throw new DMLRuntimeException("Cannot copy from device to host as JCuda dense/sparse pointer is not allocated");
}
dirty = false;
}
use of org.apache.sysml.runtime.matrix.data.SparseBlockCSR in project incubator-systemml by apache.
the class GPUObject method copyFromHostToDevice.
void copyFromHostToDevice(String opcode) {
if (LOG.isTraceEnabled()) {
LOG.trace("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + getGPUContext());
}
long start = 0;
if (DMLScript.STATISTICS)
start = System.nanoTime();
long acqrTime = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
MatrixBlock tmp = mat.acquireRead();
if (DMLScript.FINEGRAINED_STATISTICS) {
if (tmp.isInSparseFormat())
GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_GET_SPARSE_MB, System.nanoTime() - acqrTime);
else
GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_GET_DENSE_MB, System.nanoTime() - acqrTime);
}
if (tmp.isInSparseFormat()) {
int[] rowPtr = null;
int[] colInd = null;
double[] values = null;
// Only recompute non-zero if unknown, else this will incur huge penalty !!
if (tmp.getNonZeros() < 0) {
tmp.recomputeNonZeros();
}
long nnz = tmp.getNonZeros();
mat.getMatrixCharacteristics().setNonZeros(nnz);
SparseBlock block = tmp.getSparseBlock();
boolean copyToDevice = true;
if (block == null && tmp.getNonZeros() == 0) {
// // Allocate empty block --> not necessary
// // To reproduce this, see org.apache.sysml.test.integration.applications.dml.ID3DMLTest
// rowPtr = new int[0];
// colInd = new int[0];
// values = new double[0];
copyToDevice = false;
} else if (block == null && tmp.getNonZeros() != 0) {
throw new DMLRuntimeException("Expected CP sparse block to be not null.");
} else {
// CSR is the preferred format for cuSparse GEMM
// Converts MCSR and COO to CSR
SparseBlockCSR csrBlock = null;
long t0 = 0;
if (block instanceof SparseBlockCSR) {
csrBlock = (SparseBlockCSR) block;
} else if (block instanceof SparseBlockCOO) {
// TODO - should we do this on the GPU using cusparse<t>coo2csr() ?
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockCOO cooBlock = (SparseBlockCOO) block;
csrBlock = new SparseBlockCSR(toIntExact(mat.getNumRows()), cooBlock.rowIndexes(), cooBlock.indexes(), cooBlock.values());
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.increment();
} else if (block instanceof SparseBlockMCSR) {
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockMCSR mcsrBlock = (SparseBlockMCSR) block;
csrBlock = new SparseBlockCSR(mcsrBlock.getRows(), toIntExact(mcsrBlock.size()));
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.increment();
} else {
throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
}
rowPtr = csrBlock.rowPointers();
colInd = csrBlock.indexes();
values = csrBlock.values();
}
allocateSparseMatrixOnDevice();
if (copyToDevice) {
long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
CSRPointer.copyToDevice(getGPUContext(), getJcudaSparseMatrixPtr(), tmp.getNumRows(), tmp.getNonZeros(), rowPtr, colInd, values);
if (DMLScript.FINEGRAINED_STATISTICS)
GPUStatistics.maintainCPMiscTimes(opcode, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, System.nanoTime() - t1);
}
} else {
double[] data = tmp.getDenseBlockValues();
if (data == null && tmp.getSparseBlock() != null)
throw new DMLRuntimeException("Incorrect sparsity calculation");
else if (data == null && tmp.getNonZeros() != 0)
throw new DMLRuntimeException("MatrixBlock is not allocated");
allocateDenseMatrixOnDevice();
if (tmp.getNonZeros() == 0) {
// Minor optimization: No need to allocate empty error for CPU
// data = new double[tmp.getNumRows() * tmp.getNumColumns()];
long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
cudaMemset(getJcudaDenseMatrixPtr(), 0, getDatatypeSizeOf(mat.getNumRows() * mat.getNumColumns()));
if (DMLScript.FINEGRAINED_STATISTICS)
GPUStatistics.maintainCPMiscTimes(opcode, GPUInstruction.MISC_TIMER_SET_ZERO, System.nanoTime() - t1);
} else {
// Copy dense block
// H2D now only measures the time taken to do
LibMatrixCUDA.cudaSupportFunctions.hostToDevice(getGPUContext(), data, getJcudaDenseMatrixPtr(), opcode);
}
}
mat.release();
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevTime.add(System.nanoTime() - start);
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevCount.add(1);
}
use of org.apache.sysml.runtime.matrix.data.SparseBlockCSR in project incubator-systemml by apache.
the class GPUObject method copyFromHostToDevice.
void copyFromHostToDevice() throws DMLRuntimeException {
LOG.trace("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + getGPUContext());
long start = 0;
if (DMLScript.STATISTICS)
start = System.nanoTime();
MatrixBlock tmp = mat.acquireRead();
if (tmp.isInSparseFormat()) {
int[] rowPtr = null;
int[] colInd = null;
double[] values = null;
tmp.recomputeNonZeros();
long nnz = tmp.getNonZeros();
mat.getMatrixCharacteristics().setNonZeros(nnz);
SparseBlock block = tmp.getSparseBlock();
boolean copyToDevice = true;
if (block == null && tmp.getNonZeros() == 0) {
// // Allocate empty block --> not necessary
// // To reproduce this, see org.apache.sysml.test.integration.applications.dml.ID3DMLTest
// rowPtr = new int[0];
// colInd = new int[0];
// values = new double[0];
copyToDevice = false;
} else if (block == null && tmp.getNonZeros() != 0) {
throw new DMLRuntimeException("Expected CP sparse block to be not null.");
} else {
// CSR is the preferred format for cuSparse GEMM
// Converts MCSR and COO to CSR
SparseBlockCSR csrBlock = null;
long t0 = 0;
if (block instanceof SparseBlockCSR) {
csrBlock = (SparseBlockCSR) block;
} else if (block instanceof SparseBlockCOO) {
// TODO - should we do this on the GPU using cusparse<t>coo2csr() ?
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockCOO cooBlock = (SparseBlockCOO) block;
csrBlock = new SparseBlockCSR(toIntExact(mat.getNumRows()), cooBlock.rowIndexes(), cooBlock.indexes(), cooBlock.values());
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.incrementAndGet();
} else if (block instanceof SparseBlockMCSR) {
if (DMLScript.STATISTICS)
t0 = System.nanoTime();
SparseBlockMCSR mcsrBlock = (SparseBlockMCSR) block;
csrBlock = new SparseBlockCSR(mcsrBlock.getRows(), toIntExact(mcsrBlock.size()));
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionTime.addAndGet(System.nanoTime() - t0);
if (DMLScript.STATISTICS)
GPUStatistics.cudaSparseConversionCount.incrementAndGet();
} else {
throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
}
rowPtr = csrBlock.rowPointers();
colInd = csrBlock.indexes();
values = csrBlock.values();
}
allocateSparseMatrixOnDevice();
if (copyToDevice) {
CSRPointer.copyToDevice(getJcudaSparseMatrixPtr(), tmp.getNumRows(), tmp.getNonZeros(), rowPtr, colInd, values);
}
} else {
double[] data = tmp.getDenseBlock();
if (data == null && tmp.getSparseBlock() != null)
throw new DMLRuntimeException("Incorrect sparsity calculation");
else if (data == null && tmp.getNonZeros() != 0)
throw new DMLRuntimeException("MatrixBlock is not allocated");
else if (tmp.getNonZeros() == 0)
data = new double[tmp.getNumRows() * tmp.getNumColumns()];
// Copy dense block
allocateDenseMatrixOnDevice();
cudaMemcpy(getJcudaDenseMatrixPtr(), Pointer.to(data), getDoubleSizeOf(mat.getNumRows() * mat.getNumColumns()), cudaMemcpyHostToDevice);
}
mat.release();
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevTime.addAndGet(System.nanoTime() - start);
if (DMLScript.STATISTICS)
GPUStatistics.cudaToDevCount.addAndGet(1);
}
Aggregations