use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixSparseGeneric.
private static void aggregateBinaryMatrixSparseGeneric(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr) throws DMLRuntimeException {
if (in.isEmptyBlock(false))
return;
SparseBlock a = in.getSparseBlock();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int rlen = Math.min(a.numRows(), m);
for (int i = 0; i < rlen; i++) {
if (!a.isEmpty(i)) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
for (int j = apos; j < apos + alen; j++) {
int jix = aix[j];
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggCorr.quickGetValue(i, jix);
akplus.execute2(buffer1, avals[j]);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggCorr.quickSetValue(i, jix, buffer1._correction);
}
}
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
aggCorr.examSparsity();
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class LibMatrixAgg method cumaggregateUnaryMatrixDense.
private static void cumaggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, double[] agg, int rl, int ru) throws DMLRuntimeException {
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseBlock();
double[] c = out.getDenseBlock();
switch(optype) {
case //CUMSUM
CUM_KAHAN_SUM:
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
d_ucumkp(a, agg, c, m, n, kbuff, kplus, rl, ru);
break;
}
case //CUMPROD
CUM_PROD:
{
d_ucumm(a, agg, c, m, n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = Double.MAX_VALUE * ((optype == AggType.CUM_MAX) ? -1 : 1);
d_ucummxx(a, agg, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported cumulative aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) throws DMLRuntimeException {
//call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
//check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
//prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
//special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = Double.MAX_VALUE * ((((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? -1 : 1);
Arrays.fill(ret.getDenseBlock(), val);
}
//core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
//multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
//compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
ret = (MatrixBlock) uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);
//compute all compressed column groups
ExecutorService pool = Executors.newFixedThreadPool(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<UnaryAggregateTask>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
//aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
//process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
//process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
//special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
//drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColums(op.aggOp.correctionLocation);
//post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class ColGroupRLE method computeColSums.
@Override
protected final void computeColSums(MatrixBlock result, KahanFunction kplus) {
KahanObject kbuff = new KahanObject(0, 0);
final int numCols = getNumCols();
final int numVals = getNumValues();
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int valOff = k * numCols;
int curRunEnd = 0;
int count = 0;
for (int bix = 0; bix < blen; bix += 2) {
int curRunStartOff = curRunEnd + _data[boff + bix];
curRunEnd = curRunStartOff + _data[boff + bix + 1];
count += curRunEnd - curRunStartOff;
}
//scale counts by all values
for (int j = 0; j < numCols; j++) {
kbuff.set(result.quickGetValue(0, _colIndexes[j]), result.quickGetValue(1, _colIndexes[j]));
kplus.execute3(kbuff, _values[valOff + j], count);
result.quickSetValue(0, _colIndexes[j], kbuff._sum);
result.quickSetValue(1, _colIndexes[j], kbuff._correction);
}
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class GroupedAggMRCombiner method reduce.
@Override
public void reduce(TaggedMatrixIndexes key, Iterator<WeightedCell> values, OutputCollector<TaggedMatrixIndexes, WeightedCell> out, Reporter reporter) throws IOException {
long start = System.currentTimeMillis();
//get aggregate operator
GroupedAggregateInstruction ins = grpaggInstructions.get(key.getTag());
Operator op = ins.getOperator();
boolean isPartialAgg = true;
//combine iterator to single value
try {
if (//everything except sum
op instanceof CMOperator) {
if (((CMOperator) op).isPartialAggregateOperator()) {
cmObj.reset();
CM lcmFn = cmFn.get(key.getTag());
//partial aggregate cm operator
while (values.hasNext()) {
WeightedCell value = values.next();
lcmFn.execute(cmObj, value.getValue(), value.getWeight());
}
outCell.setValue(cmObj.getRequiredPartialResult(op));
outCell.setWeight(cmObj.getWeight());
} else //forward tuples to reducer
{
isPartialAgg = false;
while (values.hasNext()) out.collect(key, values.next());
}
} else if (//sum
op instanceof AggregateOperator) {
AggregateOperator aggop = (AggregateOperator) op;
if (aggop.correctionExists) {
KahanObject buffer = new KahanObject(aggop.initialValue, 0);
KahanPlus.getKahanPlusFnObject();
//partial aggregate with correction
while (values.hasNext()) {
WeightedCell value = values.next();
aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight());
}
outCell.setValue(buffer._sum);
outCell.setWeight(1);
} else //no correction
{
double v = aggop.initialValue;
//partial aggregate without correction
while (values.hasNext()) {
WeightedCell value = values.next();
v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight());
}
outCell.setValue(v);
outCell.setWeight(1);
}
} else
throw new IOException("Unsupported operator in instruction: " + ins);
} catch (Exception ex) {
throw new IOException(ex);
}
//collect the output (to reducer)
if (isPartialAgg)
out.collect(key, outCell);
reporter.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}
Aggregations