use of org.apache.sysml.runtime.functionobjects.KahanFunction in project systemml by apache.
the class LibMatrixAgg method getAggType.
private static AggType getAggType(AggregateUnaryOperator op) {
ValueFunction vfn = op.aggOp.increOp.fn;
IndexFunction ifn = op.indexFn;
// (kahan) sum / sum squared / trace (for ReduceDiag)
if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
if (vfn instanceof KahanPlus)
return AggType.KAHAN_SUM;
else if (vfn instanceof KahanPlusSq)
return AggType.KAHAN_SUM_SQ;
}
// mean
if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.MEAN;
}
// variance
if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.VAR;
}
// prod
if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
return AggType.PROD;
}
// min / max
if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
BuiltinCode bfcode = ((Builtin) vfn).bFunc;
switch(bfcode) {
case MAX:
return AggType.MAX;
case MIN:
return AggType.MIN;
case MAXINDEX:
return AggType.MAX_INDEX;
case MININDEX:
return AggType.MIN_INDEX;
// do nothing
default:
}
}
return AggType.INVALID;
}
use of org.apache.sysml.runtime.functionobjects.KahanFunction in project systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
// check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
// special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
ret.getDenseBlock().set(val);
}
// core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
// multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
// compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
uc.unaryAggregateOperations(op, ret);
// compute all compressed column groups
ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
// process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
// process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
// special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
// drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
// post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.
the class SpoofCellwise method execute.
@Override
public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k) {
// sanity check
if (inputs == null || inputs.size() < 1)
throw new RuntimeException("Invalid input arguments.");
// input preparation
MatrixBlock a = inputs.get(0);
SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = a.getNumRows();
final int n = a.getNumColumns();
// sparse safe check
boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec(0, b, scalars, m, n, 0, 0) == 0);
long inputSize = sparseSafe ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
if (inputSize < PAR_NUMCELL_THRESHOLD) {
// serial execution
k = 1;
}
double ret = 0;
if (// SINGLE-THREADED
k <= 1) {
if (inputs.get(0) instanceof CompressedMatrixBlock)
ret = executeCompressedAndAgg((CompressedMatrixBlock) a, b, scalars, m, n, sparseSafe, 0, m);
else if (!inputs.get(0).isInSparseFormat())
ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m);
else
ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m);
} else // MULTI-THREADED
{
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<ParAggTask> tasks = new ArrayList<>();
int nk = (a instanceof CompressedMatrixBlock) ? k : UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
int blklen = (int) (Math.ceil((double) m / nk));
if (a instanceof CompressedMatrixBlock)
blklen = BitmapEncoder.getAlignedBlocksize(blklen);
for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParAggTask(a, b, scalars, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
// execute tasks
List<Future<Double>> taskret = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
ValueFunction vfun = getAggFunction();
if (vfun instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
for (Future<Double> task : taskret) kplus.execute2(kbuff, task.get());
ret = kbuff._sum;
} else {
for (Future<Double> task : taskret) ret = vfun.execute(ret, task.get());
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
// correction for min/max
if ((_aggOp == AggOp.MIN || _aggOp == AggOp.MAX) && sparseSafe && a.getNonZeros() < a.getNumRows() * a.getNumColumns())
// unseen 0 might be max or min value
ret = getAggFunction().execute(ret, 0);
return new DoubleObject(ret);
}
use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.
the class SpoofCellwise method execute.
@Override
public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
// sanity check
if (inputs == null || inputs.size() < 1 || out == null)
throw new RuntimeException("Invalid input arguments.");
// input preparation
MatrixBlock a = inputs.get(0);
SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = a.getNumRows();
final int n = a.getNumColumns();
// sparse safe check
boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec(0, b, scalars, m, n, 0, 0) == 0);
long inputSize = sparseSafe ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
if (inputSize < PAR_NUMCELL_THRESHOLD) {
// serial execution
k = 1;
}
// result allocation and preparations
boolean sparseOut = _type == CellType.NO_AGG && sparseSafe && a.isInSparseFormat();
switch(_type) {
case NO_AGG:
out.reset(m, n, sparseOut);
break;
case ROW_AGG:
out.reset(m, 1, false);
break;
case COL_AGG:
out.reset(1, n, false);
break;
default:
throw new DMLRuntimeException("Invalid cell type: " + _type);
}
out.allocateBlock();
long lnnz = 0;
if (// SINGLE-THREADED
k <= 1) {
if (inputs.get(0) instanceof CompressedMatrixBlock)
lnnz = executeCompressed((CompressedMatrixBlock) a, b, scalars, out, m, n, sparseSafe, 0, m);
else if (!inputs.get(0).isInSparseFormat())
lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
else
lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m);
} else // MULTI-THREADED
{
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<ParExecTask> tasks = new ArrayList<>();
int nk = UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
int blklen = (int) (Math.ceil((double) m / nk));
if (a instanceof CompressedMatrixBlock)
blklen = BitmapEncoder.getAlignedBlocksize(blklen);
for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParExecTask(a, b, scalars, out, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
// execute tasks
List<Future<Long>> taskret = pool.invokeAll(tasks);
pool.shutdown();
// aggregate nnz and error handling
for (Future<Long> task : taskret) lnnz += task.get();
if (_type == CellType.COL_AGG) {
// aggregate partial results
double[] c = out.getDenseBlockValues();
ValueFunction vfun = getAggFunction();
if (vfun instanceof KahanFunction) {
for (ParExecTask task : tasks) LibMatrixMult.vectAdd(task.getResult().getDenseBlockValues(), c, 0, 0, n);
} else {
for (ParExecTask task : tasks) {
double[] tmp = task.getResult().getDenseBlockValues();
for (int j = 0; j < n; j++) c[j] = vfun.execute(c[j], tmp[j]);
}
}
lnnz = out.recomputeNonZeros();
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
// post-processing
out.setNonZeros(lnnz);
out.examSparsity();
return out;
}
use of org.apache.sysml.runtime.functionobjects.KahanFunction in project incubator-systemml by apache.
the class SpoofCellwise method executeSparseColAggSum.
private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) {
KahanFunction kplus = (KahanFunction) getAggFunction();
KahanObject kbuff = new KahanObject(0, 0);
double[] corr = new double[n];
// note: sequential scan algorithm for both sparse-safe and -unsafe
// in order to avoid binary search for sparse-unsafe
double[] c = out.getDenseBlockValues();
for (int i = rl; i < ru; i++) {
kbuff.set(0, 0);
int lastj = -1;
// handle non-empty rows
if (sblock != null && !sblock.isEmpty(i)) {
int apos = sblock.pos(i);
int alen = sblock.size(i);
int[] aix = sblock.indexes(i);
double[] avals = sblock.values(i);
for (int k = apos; k < apos + alen; k++) {
// process zeros before current non-zero
if (!sparseSafe)
for (int j = lastj + 1; j < aix[k]; j++) {
kbuff.set(c[j], corr[j]);
kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
c[j] = kbuff._sum;
corr[j] = kbuff._correction;
}
// process current non-zero
lastj = aix[k];
kbuff.set(c[aix[k]], corr[aix[k]]);
kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj));
c[aix[k]] = kbuff._sum;
corr[aix[k]] = kbuff._correction;
}
}
// process empty rows or remaining zeros
if (!sparseSafe)
for (int j = lastj + 1; j < n; j++) {
kbuff.set(c[j], corr[j]);
kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
c[j] = kbuff._sum;
corr[j] = kbuff._correction;
}
}
return -1;
}
Aggregations