use of org.apache.sysml.runtime.compress.CompressedMatrixBlock in project incubator-systemml by apache.
the class SpoofOperator method prepInputMatricesDense.
protected double[][] prepInputMatricesDense(ArrayList<MatrixBlock> inputs, int offset, int len) throws DMLRuntimeException {
double[][] b = new double[len][];
for (int i = offset; i < offset + len; i++) {
if (inputs.get(i) instanceof CompressedMatrixBlock)
inputs.set(i, ((CompressedMatrixBlock) inputs.get(i)).decompress());
//this in place because this block might be used by multiple threads)
if (inputs.get(i).isInSparseFormat() && inputs.get(i).isAllocated()) {
MatrixBlock tmp = inputs.get(i);
b[i - offset] = DataConverter.convertToDoubleVector(tmp);
LOG.warn(getClass().getName() + ": Converted " + tmp.getNumRows() + "x" + tmp.getNumColumns() + ", nnz=" + tmp.getNonZeros() + " sideways input matrix from sparse to dense.");
} else //use existing dense block
{
b[i - offset] = inputs.get(i).getDenseBlock();
}
}
return b;
}
use of org.apache.sysml.runtime.compress.CompressedMatrixBlock in project incubator-systemml by apache.
the class SparkExecutionContext method toMatrixBlock.
/**
* Utility method for creating a single matrix block out of a binary block RDD.
* Note that this collect call might trigger execution of any pending transformations.
*
* NOTE: This is an unguarded utility function, which requires memory for both the output matrix
* and its collected, blocked representation.
*
* @param rdd JavaPairRDD for matrix block
* @param rlen number of rows
* @param clen number of columns
* @param brlen number of rows in a block
* @param bclen number of columns in a block
* @param nnz number of non-zeros
* @return matrix block
*/
public static MatrixBlock toMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixBlock> rdd, int rlen, int clen, int brlen, int bclen, long nnz) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
MatrixBlock out = null;
if (// SINGLE BLOCK
rlen <= brlen && clen <= bclen) {
// special case without copy and nnz maintenance
List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
if (list.size() > 1)
throw new DMLRuntimeException("Expecting no more than one result block.");
else if (list.size() == 1)
out = list.get(0)._2();
else
// empty (e.g., after ops w/ outputEmpty=false)
out = new MatrixBlock(rlen, clen, true);
out.examSparsity();
} else // MULTIPLE BLOCKS
{
// determine target sparse/dense representation
long lnnz = (nnz >= 0) ? nnz : (long) rlen * clen;
boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, lnnz);
// create output matrix block (w/ lazy allocation)
out = new MatrixBlock(rlen, clen, sparse, lnnz);
List<Tuple2<MatrixIndexes, MatrixBlock>> list = rdd.collect();
// copy blocks one-at-a-time into output matrix block
long aNnz = 0;
for (Tuple2<MatrixIndexes, MatrixBlock> keyval : list) {
// unpack index-block pair
MatrixIndexes ix = keyval._1();
MatrixBlock block = keyval._2();
// compute row/column block offsets
int row_offset = (int) (ix.getRowIndex() - 1) * brlen;
int col_offset = (int) (ix.getColumnIndex() - 1) * bclen;
int rows = block.getNumRows();
int cols = block.getNumColumns();
// handle compressed blocks (decompress for robustness)
if (block instanceof CompressedMatrixBlock)
block = ((CompressedMatrixBlock) block).decompress();
// append block
if (sparse) {
// SPARSE OUTPUT
// append block to sparse target in order to avoid shifting, where
// we use a shallow row copy in case of MCSR and single column blocks
// note: this append requires, for multiple column blocks, a final sort
out.appendToSparse(block, row_offset, col_offset, clen > bclen);
} else {
// DENSE OUTPUT
out.copy(row_offset, row_offset + rows - 1, col_offset, col_offset + cols - 1, block, false);
}
// incremental maintenance nnz
aNnz += block.getNonZeros();
}
// post-processing output matrix
if (sparse && clen > bclen)
out.sortSparseRows();
out.setNonZeros(aNnz);
out.examSparsity();
}
if (DMLScript.STATISTICS) {
Statistics.accSparkCollectTime(System.nanoTime() - t0);
Statistics.incSparkCollectCount(1);
}
return out;
}
use of org.apache.sysml.runtime.compress.CompressedMatrixBlock in project incubator-systemml by apache.
the class SpoofMultiAggregate method execute.
@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
// sanity check
if (inputs == null || inputs.size() < 1)
throw new RuntimeException("Invalid input arguments.");
long inputSize = isSparseSafe() ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
if (inputSize < PAR_NUMCELL_THRESHOLD) {
// serial execution
k = 1;
}
// result allocation and preparations
out.reset(1, _aggOps.length, false);
out.allocateDenseBlock();
// 1x<num_agg>
double[] c = out.getDenseBlockValues();
setInitialOutputValues(c);
// input preparation
SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
boolean sparseSafe = isSparseSafe();
if (// SINGLE-THREADED
k <= 1) {
if (inputs.get(0) instanceof CompressedMatrixBlock)
executeCompressed((CompressedMatrixBlock) inputs.get(0), b, scalars, c, m, n, 0, m);
else if (!inputs.get(0).isInSparseFormat())
executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m);
else
executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m);
} else // MULTI-THREADED
{
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<ParAggTask> tasks = new ArrayList<>();
int nk = UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
int blklen = (int) (Math.ceil((double) m / nk));
for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParAggTask(inputs.get(0), b, scalars, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
// execute tasks
List<Future<double[]>> taskret = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
ArrayList<double[]> pret = new ArrayList<>();
for (Future<double[]> task : taskret) pret.add(task.get());
aggregatePartialResults(c, pret);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
// post-processing
out.recomputeNonZeros();
out.examSparsity();
return out;
}
use of org.apache.sysml.runtime.compress.CompressedMatrixBlock in project incubator-systemml by apache.
the class SpoofOperator method prepInputMatrices.
protected SideInput[] prepInputMatrices(ArrayList<MatrixBlock> inputs, int offset, int len, boolean denseOnly, boolean tB1) {
SideInput[] b = new SideInput[len];
for (int i = offset; i < offset + len; i++) {
// decompress if necessary
if (inputs.get(i) instanceof CompressedMatrixBlock)
inputs.set(i, ((CompressedMatrixBlock) inputs.get(i)).decompress());
// transpose if necessary
int clen = inputs.get(i).getNumColumns();
MatrixBlock in = (tB1 && i == 1) ? LibMatrixReorg.transpose(inputs.get(i), new MatrixBlock(clen, inputs.get(i).getNumRows(), false)) : inputs.get(i);
// create side input
if (denseOnly && (in.isInSparseFormat() || !in.isAllocated())) {
// this in place because this block might be used by multiple threads)
if (// dense empty
in.getNumColumns() == 1 && in.isEmptyBlock(false))
b[i - offset] = new SideInput(null, null, clen);
else {
b[i - offset] = new SideInput(DataConverter.convertToDenseBlock(in, false), null, clen);
LOG.warn(getClass().getName() + ": Converted " + in.getNumRows() + "x" + in.getNumColumns() + ", nnz=" + in.getNonZeros() + " sideways input matrix from sparse to dense.");
}
} else if (in.isInSparseFormat() || !in.isAllocated()) {
b[i - offset] = new SideInput(null, in, clen);
} else {
b[i - offset] = new SideInput(in.getDenseBlock(), null, clen);
}
}
return b;
}
use of org.apache.sysml.runtime.compress.CompressedMatrixBlock in project incubator-systemml by apache.
the class SpoofOuterProduct method execute.
@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int numThreads) {
// sanity check
if (inputs == null || inputs.size() < 3 || out == null)
throw new RuntimeException("Invalid input arguments.");
// check empty result
if (// U is empty
(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false)) || // V is empty
(_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false)) || inputs.get(0).isEmptyBlock(false)) {
// X is empty
// turn empty dense into sparse
out.examSparsity();
return out;
}
// input preparation and result allocation (Allocate the output that is set by Sigma2CPInstruction)
if (_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
// assign it to the time and sparse representation of the major input matrix
out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
out.allocateBlock();
} else {
// if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output
if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
// n*k
out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
else if (_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT)
// m*k
out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
out.allocateDenseBlock();
}
if (2 * inputs.get(0).getNonZeros() * inputs.get(1).getNumColumns() < PAR_MINFLOP_THRESHOLD)
// sequential
return execute(inputs, scalarObjects, out);
// input preparation
DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false));
SideInput[] b = prepInputMatrices(inputs, 3, false);
double[] scalars = prepInputScalars(scalarObjects);
// core sequential execute
final int m = inputs.get(0).getNumRows();
final int n = inputs.get(0).getNumColumns();
// rank
final int k = inputs.get(1).getNumColumns();
final long nnz = inputs.get(0).getNonZeros();
MatrixBlock a = inputs.get(0);
try {
ExecutorService pool = CommonThreadPool.get(numThreads);
ArrayList<ParExecTask> tasks = new ArrayList<>();
if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
if (a instanceof CompressedMatrixBlock) {
// parallelize over column groups
int numCG = ((CompressedMatrixBlock) a).getNumColGroups();
int blklen = (int) (Math.ceil((double) numCG / numThreads));
for (int j = 0; j < numThreads & j * blklen < numCG; j++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j * blklen, Math.min((j + 1) * blklen, numCG)));
} else {
// parallelize over column partitions
int blklen = (int) (Math.ceil((double) n / numThreads));
for (int j = 0; j < numThreads & j * blklen < n; j++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j * blklen, Math.min((j + 1) * blklen, n)));
}
} else {
// right or cell-wise
// parallelize over row partitions
int numThreads2 = getPreferredNumberOfTasks(m, n, nnz, k, numThreads);
int blklen = (int) (Math.ceil((double) m / numThreads2));
for (int i = 0; i < numThreads2 & i * blklen < m; i++) tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, i * blklen, Math.min((i + 1) * blklen, m), 0, n));
}
List<Future<Long>> taskret = pool.invokeAll(tasks);
pool.shutdown();
for (Future<Long> task : taskret) out.setNonZeros(out.getNonZeros() + task.get());
} catch (Exception e) {
throw new DMLRuntimeException(e);
}
// post-processing
if (a instanceof CompressedMatrixBlock) {
if (out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT)
out.sortSparseRows();
else if (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT)
out.recomputeNonZeros();
}
out.examSparsity();
return out;
}
Aggregations