use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class ColGroupOLE method computeRowSums.
@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
// note: due to corrections the output might be a large dense block
DenseBlock c = result.getDenseBlock();
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ;
final int numVals = getNumValues();
if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz) {
final int blksz2 = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
// step 1: prepare position and value arrays
int[] apos = skipScan(numVals, rl);
double[] aval = sumAllValues(kplus, kbuff, false);
// step 2: cache conscious row sums via horizontal scans
for (int bi = rl; bi < ru; bi += blksz2) {
int bimax = Math.min(bi + blksz2, ru);
// horizontal segment scan, incl pos maintenance
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = aval[k];
int bix = apos[k];
for (int ii = bi; ii < bimax && bix < blen; ii += blksz) {
// prepare length, start, and end pos
int len = _data[boff + bix];
int pos = boff + bix + 1;
// compute partial results
for (int i = 0; i < len; i++) {
int rix = ii + _data[pos + i];
double[] cvals = c.values(rix);
int cix = c.pos(rix);
kbuff.set(cvals[cix], cvals[cix + 1]);
kplus2.execute2(kbuff, val);
cvals[cix] = kbuff._sum;
cvals[cix + 1] = kbuff._correction;
}
bix += len + 1;
}
apos[k] = bix;
}
}
} else {
// iterate over all values and their bitmaps
for (int k = 0; k < numVals; k++) {
// prepare value-to-add for entire value bitmap
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, kplus, kbuff);
// iterate over bitmap blocks and add values
if (val != 0) {
int slen;
int bix = skipScanVal(k, rl);
for (int off = ((rl + 1) / blksz) * blksz; bix < blen && off < ru; bix += slen + 1, off += blksz) {
slen = _data[boff + bix];
for (int i = 1; i <= slen; i++) {
int rix = off + _data[boff + bix + i];
double[] cvals = c.values(rix);
int cix = c.pos(rix);
kbuff.set(cvals[cix], cvals[cix + 1]);
kplus2.execute2(kbuff, val);
cvals[cix] = kbuff._sum;
cvals[cix + 1] = kbuff._correction;
}
}
}
}
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixDenseGeneric.
private static void aggregateBinaryMatrixDenseGeneric(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr) {
if (in.denseBlock == null || in.isEmptyBlock(false))
return;
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseBlockValues();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
// incl implicit nnz maintenance
for (int i = 0, ix = 0; i < m; i++) for (int j = 0; j < n; j++, ix++) {
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggCorr.quickGetValue(i, j);
akplus.execute(buffer, a[ix]);
aggVal.quickSetValue(i, j, buffer._sum);
aggCorr.quickSetValue(i, j, buffer._correction);
}
// note: nnz of aggVal/aggCorr maintained internally
if (aggVal.sparse)
aggVal.examSparsity(false);
if (aggCorr.sparse)
aggCorr.examSparsity(false);
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method getAggType.
private static AggType getAggType(AggregateUnaryOperator op) {
ValueFunction vfn = op.aggOp.increOp.fn;
IndexFunction ifn = op.indexFn;
// (kahan) sum / sum squared / trace (for ReduceDiag)
if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
if (vfn instanceof KahanPlus)
return AggType.KAHAN_SUM;
else if (vfn instanceof KahanPlusSq)
return AggType.KAHAN_SUM_SQ;
}
// mean
if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.MEAN;
}
// variance
if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.VAR;
}
// prod
if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
return AggType.PROD;
}
// min / max
if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
BuiltinCode bfcode = ((Builtin) vfn).bFunc;
switch(bfcode) {
case MAX:
return AggType.MAX;
case MIN:
return AggType.MIN;
case MAXINDEX:
return AggType.MAX_INDEX;
case MININDEX:
return AggType.MIN_INDEX;
// do nothing
default:
}
}
return AggType.INVALID;
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixLastRowDenseGeneric.
private static void aggregateBinaryMatrixLastRowDenseGeneric(MatrixBlock in, MatrixBlock aggVal) {
if (in.denseBlock == null || in.isEmptyBlock(false))
return;
final int m = in.rlen;
final int n = in.clen;
final int cix = (m - 1) * n;
double[] a = in.getDenseBlockValues();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
// incl implicit nnz maintenance
for (int i = 0, ix = 0; i < m - 1; i++) for (int j = 0; j < n; j++, ix++) {
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggVal.quickGetValue(m - 1, j);
akplus.execute(buffer, a[ix], a[cix + j]);
aggVal.quickSetValue(i, j, buffer._sum);
aggVal.quickSetValue(m - 1, j, buffer._correction);
}
// note: nnz of aggVal maintained internally
aggVal.examSparsity();
}
use of org.apache.sysml.runtime.functionobjects.KahanPlus in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateTernaryGeneric.
private static void aggregateTernaryGeneric(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, IndexFunction ixFn, int rl, int ru) {
// compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
// guaranteed to have at least one sparse input, sort by nnz, assume num cells if
// (potentially incorrect) in dense representation, keep null at end via stable sort
MatrixBlock[] blocks = new MatrixBlock[] { in1, in2, in3 };
Arrays.sort(blocks, new Comparator<MatrixBlock>() {
@Override
public int compare(MatrixBlock o1, MatrixBlock o2) {
long nnz1 = (o1 != null && o1.sparse) ? o1.nonZeros : Long.MAX_VALUE;
long nnz2 = (o2 != null && o2.sparse) ? o2.nonZeros : Long.MAX_VALUE;
return Long.compare(nnz1, nnz2);
}
});
MatrixBlock lin1 = blocks[0];
MatrixBlock lin2 = blocks[1];
MatrixBlock lin3 = blocks[2];
SparseBlock a = lin1.sparseBlock;
final int n = in1.clen;
if (// tak+*
ixFn instanceof ReduceAll) {
for (int i = rl; i < ru; i++) if (!a.isEmpty(i)) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
for (int j = apos; j < apos + alen; j++) {
double val1 = avals[j];
double val2 = lin2.quickGetValue(i, aix[j]);
double val = val1 * val2;
if (val != 0 && lin3 != null)
val *= lin3.quickGetValue(i, aix[j]);
kplus.execute2(kbuff, val);
}
}
ret.quickSetValue(0, 0, kbuff._sum);
ret.quickSetValue(0, 1, kbuff._correction);
} else // tack+*
{
double[] c = ret.getDenseBlockValues();
for (int i = rl; i < ru; i++) if (!a.isEmpty(i)) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
for (int j = apos; j < apos + alen; j++) {
int colIx = aix[j];
double val1 = avals[j];
double val2 = lin2.quickGetValue(i, colIx);
double val = val1 * val2;
if (val != 0 && lin3 != null)
val *= lin3.quickGetValue(i, colIx);
kbuff._sum = c[colIx];
kbuff._correction = c[colIx + n];
kplus.execute2(kbuff, val);
c[colIx] = kbuff._sum;
c[colIx + n] = kbuff._correction;
}
}
}
}
Aggregations