use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
// check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
// special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
ret.getDenseBlock().set(val);
}
// core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
// multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
// compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
uc.unaryAggregateOperations(op, ret);
// compute all compressed column groups
ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
// process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
// process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
// special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
// drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
// post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.
the class CompressedMatrixBlock method decompress.
/**
* Decompress block.
*
* @param k degree of parallelism
* @return a new uncompressed matrix block containing the contents
* of this block
*/
public MatrixBlock decompress(int k) {
// early abort for not yet compressed blocks
if (!isCompressed())
return new MatrixBlock(this);
if (k <= 1)
return decompress();
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
MatrixBlock ret = new MatrixBlock(rlen, clen, sparse, nonZeros).allocateBlock();
// multi-threaded decompression
try {
ExecutorService pool = CommonThreadPool.get(k);
int rlen = getNumRows();
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / k)));
ArrayList<DecompressTask> tasks = new ArrayList<>();
for (int i = 0; i < k & i * blklen < getNumRows(); i++) tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen)));
List<Future<Object>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
for (Future<Object> rt : rtasks) // error handling
rt.get();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// post-processing
ret.setNonZeros(nonZeros);
if (LOG.isDebugEnabled())
LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms.");
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.
the class CompressedMatrixBlock method transposeSelfMatrixMultOperations.
@Override
public MatrixBlock transposeSelfMatrixMultOperations(MatrixBlock out, MMTSJType tstype) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.transposeSelfMatrixMultOperations(out, tstype);
}
// single-threaded tsmm of single uncompressed colgroup
if (isSingleUncompressedGroup()) {
return ((ColGroupUncompressed) _colGroups.get(0)).getData().transposeSelfMatrixMultOperations(out, tstype);
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// check for transpose type
if (// right not supported yet
tstype != MMTSJType.LEFT)
throw new DMLRuntimeException("Invalid MMTSJ type '" + tstype.toString() + "'.");
// create output matrix block
if (out == null)
out = new MatrixBlock(clen, clen, false);
else
out.reset(clen, clen, false);
out.allocateDenseBlock();
if (!isEmptyBlock(false)) {
// compute matrix mult
leftMultByTransposeSelf(_colGroups, out, 0, _colGroups.size());
// post-processing
out.setNonZeros(LinearAlgebraUtils.copyUpperToLowerTriangle(out));
}
if (LOG.isDebugEnabled())
LOG.debug("Compressed TSMM in " + time.stop());
return out;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project systemml by apache.
the class CompressedMatrixBlock method decompress.
/**
* Decompress block.
*
* @return a new uncompressed matrix block containing the contents of this
* block
*/
public MatrixBlock decompress() {
// early abort for not yet compressed blocks
if (!isCompressed())
return new MatrixBlock(this);
Timing time = new Timing(true);
// preallocation sparse rows to avoid repeated reallocations
MatrixBlock ret = new MatrixBlock(getNumRows(), getNumColumns(), isInSparseFormat(), getNonZeros());
if (ret.isInSparseFormat()) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
ret.allocateSparseRowsBlock();
SparseBlock rows = ret.getSparseBlock();
for (int i = 0; i < rlen; i++) rows.allocate(i, rnnz[i]);
}
// core decompression (append if sparse)
for (ColGroup grp : _colGroups) grp.decompressToBlock(ret, 0, rlen);
// post-processing (for append in decompress)
ret.setNonZeros(nonZeros);
if (ret.isInSparseFormat())
ret.sortSparseRows();
if (LOG.isDebugEnabled())
LOG.debug("decompressed block in " + time.stop() + "ms.");
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.stat.Timing in project incubator-systemml by apache.
the class SortUtils method main.
public static void main(String[] args) {
int n = 10000000;
int[] indexes = new int[n];
double[] values = new double[n];
Random rand = new Random();
for (int i = 0; i < n; i++) {
indexes[i] = rand.nextInt();
values[i] = rand.nextDouble();
}
System.out.println("Running quicksort test ...");
Timing time = new Timing();
time.start();
SortUtils.sortByIndex(0, indexes.length, indexes, values);
System.out.println("quicksort n=" + n + " in " + time.stop() + "ms.");
time.start();
boolean flag = SortUtils.isSorted(0, indexes.length, indexes);
System.out.println("check sorted n=" + n + " in " + time.stop() + "ms, " + flag + ".");
}
Aggregations