use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
// check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
// special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
ret.getDenseBlock().set(val);
}
// core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
// multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
// compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
uc.unaryAggregateOperations(op, ret);
// compute all compressed column groups
ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
// process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
// process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
// special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
// drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
// post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixLastRowSparseGeneric.
private static void aggregateBinaryMatrixLastRowSparseGeneric(MatrixBlock in, MatrixBlock aggVal) {
// sparse-safe operation
if (in.isEmptyBlock(false))
return;
SparseBlock a = in.getSparseBlock();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int rlen = Math.min(a.numRows(), m);
for (int i = 0; i < rlen - 1; i++) {
if (!a.isEmpty(i)) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
for (int j = apos; j < apos + alen; j++) {
int jix = aix[j];
double corr = in.quickGetValue(m - 1, jix);
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggVal.quickGetValue(m - 1, jix);
akplus.execute(buffer1, avals[j], corr);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggVal.quickSetValue(m - 1, jix, buffer1._correction);
}
}
}
// note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method cumaggregateUnaryMatrixDense.
private static void cumaggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, double[] agg, int rl, int ru) {
final int n = in.clen;
DenseBlock da = in.getDenseBlock();
DenseBlock dc = out.getDenseBlock();
double[] a = in.getDenseBlockValues();
double[] c = out.getDenseBlockValues();
switch(optype) {
case CUM_KAHAN_SUM:
{
// CUMSUM
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
d_ucumkp(da, agg, dc, n, kbuff, kplus, rl, ru);
break;
}
case CUM_PROD:
{
// CUMPROD
d_ucumm(a, agg, c, n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = (optype == AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
d_ucummxx(a, agg, c, n, init, (Builtin) vFn, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported cumulative aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateUnaryMatrixSparse.
private static void aggregateUnaryMatrixSparse(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) {
final int m = in.rlen;
final int n = in.clen;
// note: due to corrections, even the output might be a large dense block
SparseBlock a = in.getSparseBlock();
DenseBlock c = out.getDenseBlock();
switch(optype) {
case KAHAN_SUM:
{
// SUM via k+
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM
ixFn instanceof ReduceAll)
s_uakp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// ROWSUM
ixFn instanceof ReduceCol)
s_uarkp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// COLSUM
ixFn instanceof ReduceRow)
s_uackp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// TRACE
ixFn instanceof ReduceDiag)
s_uakptrace(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
break;
}
case KAHAN_SUM_SQ:
{
// SUM_SQ via k+
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM_SQ
ixFn instanceof ReduceAll)
s_uasqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (// ROWSUM_SQ
ixFn instanceof ReduceCol)
s_uarsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (// COLSUM_SQ
ixFn instanceof ReduceRow)
s_uacsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
break;
}
case CUM_KAHAN_SUM:
{
// CUMSUM
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
s_ucumkp(a, null, out.getDenseBlock(), m, n, kbuff, kplus, rl, ru);
break;
}
case CUM_PROD:
{
// CUMPROD
s_ucumm(a, null, out.getDenseBlockValues(), n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = (optype == AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
s_ucummxx(a, null, out.getDenseBlockValues(), n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN:
case MAX:
{
// MAX/MIN
double init = (optype == AggType.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
if (// MIN/MAX
ixFn instanceof ReduceAll)
s_uamxx(a, c, n, init, (Builtin) vFn, rl, ru);
else if (// ROWMIN/ROWMAX
ixFn instanceof ReduceCol)
s_uarmxx(a, c, n, init, (Builtin) vFn, rl, ru);
else if (// COLMIN/COLMAX
ixFn instanceof ReduceRow)
s_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = Double.NEGATIVE_INFINITY;
if (// ROWINDEXMAX
ixFn instanceof ReduceCol)
s_uarimxx(a, c, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.POSITIVE_INFINITY;
if (// ROWINDEXMAX
ixFn instanceof ReduceCol)
s_uarimin(a, c, n, init, (Builtin) vFn, rl, ru);
break;
}
case MEAN:
{
KahanObject kbuff = new KahanObject(0, 0);
if (// MEAN
ixFn instanceof ReduceAll)
s_uamean(a, c, n, kbuff, (Mean) vFn, rl, ru);
else if (// ROWMEAN
ixFn instanceof ReduceCol)
s_uarmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
else if (// COLMEAN
ixFn instanceof ReduceRow)
s_uacmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
break;
}
case VAR:
{
// VAR
CM_COV_Object cbuff = new CM_COV_Object();
if (// VAR
ixFn instanceof ReduceAll)
s_uavar(a, c, n, cbuff, (CM) vFn, rl, ru);
else if (// ROWVAR
ixFn instanceof ReduceCol)
s_uarvar(a, c, n, cbuff, (CM) vFn, rl, ru);
else if (// COLVAR
ixFn instanceof ReduceRow)
s_uacvar(a, c, n, cbuff, (CM) vFn, rl, ru);
break;
}
case PROD:
{
// PROD
if (// PROD
ixFn instanceof ReduceAll)
s_uam(a, c, n, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateTernaryDense.
private static void aggregateTernaryDense(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, IndexFunction ixFn, int rl, int ru) {
// compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
double[] a = in1.getDenseBlockValues();
double[] b1 = in2.getDenseBlockValues();
// if null, literal 1
double[] b2 = (in3 != null) ? in3.getDenseBlockValues() : null;
final int n = in1.clen;
if (// tak+*
ixFn instanceof ReduceAll) {
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kplus.execute2(kbuff, val);
}
ret.quickSetValue(0, 0, kbuff._sum);
ret.quickSetValue(0, 1, kbuff._correction);
} else // tack+*
{
double[] c = ret.getDenseBlockValues();
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kbuff._sum = c[j];
kbuff._correction = c[j + n];
kplus.execute2(kbuff, val);
c[j] = kbuff._sum;
c[j + n] = kbuff._correction;
}
}
}
Aggregations