use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) {
// call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
// check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
// prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
// special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
ret.getDenseBlock().set(val);
}
// core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
// multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
// compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
uc.unaryAggregateOperations(op, ret);
// compute all compressed column groups
ExecutorService pool = CommonThreadPool.get(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
// process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
// process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
// special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
// drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColumns(op.aggOp.correctionLocation);
// post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class PerformGroupByAggInCombiner method call.
@Override
public WeightedCell call(WeightedCell value1, WeightedCell value2) throws Exception {
WeightedCell outCell = new WeightedCell();
CM_COV_Object cmObj = new CM_COV_Object();
if (// everything except sum
_op instanceof CMOperator) {
if (((CMOperator) _op).isPartialAggregateOperator()) {
cmObj.reset();
// cmFn.get(key.getTag());
CM lcmFn = CM.getCMFnObject(((CMOperator) _op).aggOpType);
// partial aggregate cm operator
lcmFn.execute(cmObj, value1.getValue(), value1.getWeight());
lcmFn.execute(cmObj, value2.getValue(), value2.getWeight());
outCell.setValue(cmObj.getRequiredPartialResult(_op));
outCell.setWeight(cmObj.getWeight());
} else // forward tuples to reducer
{
throw new DMLRuntimeException("Incorrect usage, should have used PerformGroupByAggInReducer");
}
} else if (// sum
_op instanceof AggregateOperator) {
AggregateOperator aggop = (AggregateOperator) _op;
if (aggop.correctionExists) {
KahanObject buffer = new KahanObject(aggop.initialValue, 0);
KahanPlus.getKahanPlusFnObject();
// partial aggregate with correction
aggop.increOp.fn.execute(buffer, value1.getValue() * value1.getWeight());
aggop.increOp.fn.execute(buffer, value2.getValue() * value2.getWeight());
outCell.setValue(buffer._sum);
outCell.setWeight(1);
} else // no correction
{
double v = aggop.initialValue;
// partial aggregate without correction
v = aggop.increOp.fn.execute(v, value1.getValue() * value1.getWeight());
v = aggop.increOp.fn.execute(v, value2.getValue() * value2.getWeight());
outCell.setValue(v);
outCell.setWeight(1);
}
} else
throw new DMLRuntimeException("Unsupported operator in grouped aggregate instruction:" + _op);
return outCell;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class PerformGroupByAggInReducer method call.
@Override
public WeightedCell call(Iterable<WeightedCell> kv) throws Exception {
WeightedCell outCell = new WeightedCell();
CM_COV_Object cmObj = new CM_COV_Object();
if (// everything except sum
op instanceof CMOperator) {
cmObj.reset();
// cmFn.get(key.getTag());
CM lcmFn = CM.getCMFnObject(((CMOperator) op).aggOpType);
if (((CMOperator) op).isPartialAggregateOperator()) {
throw new DMLRuntimeException("Incorrect usage, should have used PerformGroupByAggInCombiner");
} else // forward tuples to reducer
{
for (WeightedCell value : kv) lcmFn.execute(cmObj, value.getValue(), value.getWeight());
outCell.setValue(cmObj.getRequiredResult(op));
outCell.setWeight(1);
}
} else if (// sum
op instanceof AggregateOperator) {
AggregateOperator aggop = (AggregateOperator) op;
if (aggop.correctionExists) {
KahanObject buffer = new KahanObject(aggop.initialValue, 0);
KahanPlus.getKahanPlusFnObject();
// partial aggregate with correction
for (WeightedCell value : kv) aggop.increOp.fn.execute(buffer, value.getValue() * value.getWeight());
outCell.setValue(buffer._sum);
outCell.setWeight(1);
} else // no correction
{
double v = aggop.initialValue;
// partial aggregate without correction
for (WeightedCell value : kv) v = aggop.increOp.fn.execute(v, value.getValue() * value.getWeight());
outCell.setValue(v);
outCell.setWeight(1);
}
} else
throw new DMLRuntimeException("Unsupported operator in grouped aggregate instruction:" + op);
return outCell;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class CM method execute.
/**
* Combining stats from two partitions of the data.
*/
@Override
public Data execute(Data in1, Data in2) {
CM_COV_Object cm1 = (CM_COV_Object) in1;
CM_COV_Object cm2 = (CM_COV_Object) in2;
if (cm1.isCMAllZeros()) {
cm1.w = cm2.w;
cm1.mean.set(cm2.mean);
cm1.m2.set(cm2.m2);
cm1.m3.set(cm2.m3);
cm1.m4.set(cm2.m4);
return cm1;
}
if (cm2.isCMAllZeros())
return cm1;
switch(_type) {
case COUNT:
{
cm1.w = Math.round(cm1.w + cm2.w);
break;
}
case MEAN:
{
double w = cm1.w + cm2.w;
double d = cm2.mean._sum - cm1.mean._sum;
cm1.mean = (KahanObject) _plus.execute(cm1.mean, cm2.w * d / w);
cm1.w = w;
break;
}
case CM2:
{
double w = cm1.w + cm2.w;
double d = cm2.mean._sum - cm1.mean._sum;
cm1.mean = (KahanObject) _plus.execute(cm1.mean, cm2.w * d / w);
double t1 = cm1.w * cm2.w / w * d;
double lt1 = t1 * d;
_buff2.set(cm1.m2);
_buff2 = (KahanObject) _plus.execute(_buff2, cm2.m2._sum, cm2.m2._correction);
_buff2 = (KahanObject) _plus.execute(_buff2, lt1);
cm1.m2.set(_buff2);
cm1.w = w;
break;
}
case CM3:
{
double w = cm1.w + cm2.w;
double d = cm2.mean._sum - cm1.mean._sum;
cm1.mean = (KahanObject) _plus.execute(cm1.mean, cm2.w * d / w);
double t1 = cm1.w * cm2.w / w * d;
double t2 = -1 / cm1.w;
double lt1 = t1 * d;
double lt2 = Math.pow(t1, 3) * (1 / Math.pow(cm2.w, 2) - Math.pow(t2, 2));
double f1 = cm1.w / w;
double f2 = cm2.w / w;
_buff2.set(cm1.m2);
_buff2 = (KahanObject) _plus.execute(_buff2, cm2.m2._sum, cm2.m2._correction);
_buff2 = (KahanObject) _plus.execute(_buff2, lt1);
_buff3.set(cm1.m3);
_buff3 = (KahanObject) _plus.execute(_buff3, cm2.m3._sum, cm2.m3._correction);
_buff3 = (KahanObject) _plus.execute(_buff3, 3 * (-f2 * cm1.m2._sum + f1 * cm2.m2._sum) * d + lt2);
cm1.m2.set(_buff2);
cm1.m3.set(_buff3);
cm1.w = w;
break;
}
case CM4:
{
double w = cm1.w + cm2.w;
double d = cm2.mean._sum - cm1.mean._sum;
cm1.mean = (KahanObject) _plus.execute(cm1.mean, cm2.w * d / w);
double t1 = cm1.w * cm2.w / w * d;
double t2 = -1 / cm1.w;
double lt1 = t1 * d;
double lt2 = Math.pow(t1, 3) * (1 / Math.pow(cm2.w, 2) - Math.pow(t2, 2));
double lt3 = Math.pow(t1, 4) * (1 / Math.pow(cm2.w, 3) - Math.pow(t2, 3));
double f1 = cm1.w / w;
double f2 = cm2.w / w;
_buff2.set(cm1.m2);
_buff2 = (KahanObject) _plus.execute(_buff2, cm2.m2._sum, cm2.m2._correction);
_buff2 = (KahanObject) _plus.execute(_buff2, lt1);
_buff3.set(cm1.m3);
_buff3 = (KahanObject) _plus.execute(_buff3, cm2.m3._sum, cm2.m3._correction);
_buff3 = (KahanObject) _plus.execute(_buff3, 3 * (-f2 * cm1.m2._sum + f1 * cm2.m2._sum) * d + lt2);
cm1.m4 = (KahanObject) _plus.execute(cm1.m4, cm2.m4._sum, cm2.m4._correction);
cm1.m4 = (KahanObject) _plus.execute(cm1.m4, 4 * (-f2 * cm1.m3._sum + f1 * cm2.m3._sum) * d + 6 * (Math.pow(-f2, 2) * cm1.m2._sum + Math.pow(f1, 2) * cm2.m2._sum) * Math.pow(d, 2) + lt3);
cm1.m2.set(_buff2);
cm1.m3.set(_buff3);
cm1.w = w;
break;
}
case VARIANCE:
{
double w = cm1.w + cm2.w;
double d = cm2.mean._sum - cm1.mean._sum;
cm1.mean = (KahanObject) _plus.execute(cm1.mean, cm2.w * d / w);
double t1 = cm1.w * cm2.w / w * d;
double lt1 = t1 * d;
cm1.m2 = (KahanObject) _plus.execute(cm1.m2, cm2.m2._sum, cm2.m2._correction);
cm1.m2 = (KahanObject) _plus.execute(cm1.m2, lt1);
cm1.w = w;
break;
}
default:
throw new DMLRuntimeException("Unsupported operation type: " + _type);
}
return cm1;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class SpoofCellwise method execute.
@Override
public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k) {
// sanity check
if (inputs == null || inputs.size() < 1)
throw new RuntimeException("Invalid input arguments.");
// input preparation
MatrixBlock a = inputs.get(0);
SideInput[] b = prepInputMatrices(inputs);
double[] scalars = prepInputScalars(scalarObjects);
final int m = a.getNumRows();
final int n = a.getNumColumns();
// sparse safe check
boolean sparseSafe = isSparseSafe() || (b.length == 0 && genexec(0, b, scalars, m, n, 0, 0) == 0);
long inputSize = sparseSafe ? getTotalInputNnz(inputs) : getTotalInputSize(inputs);
if (inputSize < PAR_NUMCELL_THRESHOLD) {
// serial execution
k = 1;
}
double ret = 0;
if (// SINGLE-THREADED
k <= 1) {
if (inputs.get(0) instanceof CompressedMatrixBlock)
ret = executeCompressedAndAgg((CompressedMatrixBlock) a, b, scalars, m, n, sparseSafe, 0, m);
else if (!inputs.get(0).isInSparseFormat())
ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m);
else
ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m);
} else // MULTI-THREADED
{
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<ParAggTask> tasks = new ArrayList<>();
int nk = (a instanceof CompressedMatrixBlock) ? k : UtilFunctions.roundToNext(Math.min(8 * k, m / 32), k);
int blklen = (int) (Math.ceil((double) m / nk));
if (a instanceof CompressedMatrixBlock)
blklen = BitmapEncoder.getAlignedBlocksize(blklen);
for (int i = 0; i < nk & i * blklen < m; i++) tasks.add(new ParAggTask(a, b, scalars, m, n, sparseSafe, i * blklen, Math.min((i + 1) * blklen, m)));
// execute tasks
List<Future<Double>> taskret = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results
ValueFunction vfun = getAggFunction();
if (vfun instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
for (Future<Double> task : taskret) kplus.execute2(kbuff, task.get());
ret = kbuff._sum;
} else {
for (Future<Double> task : taskret) ret = vfun.execute(ret, task.get());
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
// correction for min/max
if ((_aggOp == AggOp.MIN || _aggOp == AggOp.MAX) && sparseSafe && a.getNonZeros() < a.getNumRows() * a.getNumColumns())
// unseen 0 might be max or min value
ret = getAggFunction().execute(ret, 0);
return new DoubleObject(ret);
}
Aggregations