use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class ColGroupOLE method computeSum.
@Override
protected final void computeSum(MatrixBlock result, KahanFunction kplus) {
KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
//iterate over all values and their bitmaps
final int numVals = getNumValues();
final int numCols = getNumCols();
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int valOff = k * numCols;
//iterate over bitmap blocks and count partial lengths
int count = 0;
for (int bix = 0; bix < blen; bix += _data[boff + bix] + 1) count += _data[boff + bix];
//scale counts by all values
for (int j = 0; j < numCols; j++) kplus.execute3(kbuff, _values[valOff + j], count);
}
result.quickSetValue(0, 0, kbuff._sum);
result.quickSetValue(0, 1, kbuff._correction);
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class ColGroupRLE method computeSum.
@Override
protected final void computeSum(MatrixBlock result, KahanFunction kplus) {
KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1));
final int numCols = getNumCols();
final int numVals = getNumValues();
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
int valOff = k * numCols;
int curRunEnd = 0;
int count = 0;
for (int bix = 0; bix < blen; bix += 2) {
int curRunStartOff = curRunEnd + _data[boff + bix];
curRunEnd = curRunStartOff + _data[boff + bix + 1];
count += curRunEnd - curRunStartOff;
}
//scale counts by all values
for (int j = 0; j < numCols; j++) kplus.execute3(kbuff, _values[valOff + j], count);
}
result.quickSetValue(0, 0, kbuff._sum);
result.quickSetValue(0, 1, kbuff._correction);
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class ColGroupRLE method computeRowSums.
@Override
protected final void computeRowSums(MatrixBlock result, KahanFunction kplus, int rl, int ru) {
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus2 = KahanPlus.getKahanPlusFnObject();
final int numVals = getNumValues();
double[] c = result.getDenseBlock();
if (ALLOW_CACHE_CONSCIOUS_ROWSUMS && LOW_LEVEL_OPT && numVals > 1 && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ) {
final int blksz = ColGroupOffset.WRITE_CACHE_BLKSZ / 2;
//step 1: prepare position and value arrays
//current pos / values per RLE list
int[] astart = new int[numVals];
int[] apos = skipScan(numVals, rl, astart);
double[] aval = sumAllValues(kplus, kbuff, false);
//step 2: cache conscious matrix-vector via horizontal scans
for (int bi = rl; bi < ru; bi += blksz) {
int bimax = Math.min(bi + blksz, ru);
//horizontal segment scan, incl pos maintenance
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = aval[k];
int bix = apos[k];
int start = astart[k];
//compute partial results, not aligned
while (bix < blen) {
int lstart = _data[boff + bix];
int llen = _data[boff + bix + 1];
int from = Math.max(bi, start + lstart);
int to = Math.min(start + lstart + llen, bimax);
for (int rix = from; rix < to; rix++) {
kbuff.set(c[2 * rix], c[2 * rix + 1]);
kplus2.execute2(kbuff, val);
c[2 * rix] = kbuff._sum;
c[2 * rix + 1] = kbuff._correction;
}
if (start + lstart + llen >= bimax)
break;
start += lstart + llen;
bix += 2;
}
apos[k] = bix;
astart[k] = start;
}
}
} else {
for (int k = 0; k < numVals; k++) {
int boff = _ptr[k];
int blen = len(k);
double val = sumValues(k, kplus, kbuff);
if (val != 0.0) {
Pair<Integer, Integer> tmp = skipScanVal(k, rl);
int bix = tmp.getKey();
int curRunStartOff = tmp.getValue();
int curRunEnd = tmp.getValue();
for (; bix < blen && curRunEnd < ru; bix += 2) {
curRunStartOff = curRunEnd + _data[boff + bix];
curRunEnd = curRunStartOff + _data[boff + bix + 1];
for (int rix = curRunStartOff; rix < curRunEnd && rix < ru; rix++) {
kbuff.set(c[2 * rix], c[2 * rix + 1]);
kplus2.execute2(kbuff, val);
c[2 * rix] = kbuff._sum;
c[2 * rix + 1] = kbuff._correction;
}
}
}
}
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateTernaryDense.
private static void aggregateTernaryDense(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, IndexFunction ixFn, int rl, int ru) {
//compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
double[] a = in1.denseBlock;
double[] b1 = in2.denseBlock;
//if null, literal 1
double[] b2 = (in3 != null) ? in3.denseBlock : null;
final int n = in1.clen;
if (//tak+*
ixFn instanceof ReduceAll) {
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kplus.execute2(kbuff, val);
}
ret.quickSetValue(0, 0, kbuff._sum);
ret.quickSetValue(0, 1, kbuff._correction);
} else //tack+*
{
double[] c = ret.getDenseBlock();
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kbuff._sum = c[j];
kbuff._correction = c[j + n];
kplus.execute2(kbuff, val);
c[j] = kbuff._sum;
c[j + n] = kbuff._correction;
}
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixLastRowSparseGeneric.
private static void aggregateBinaryMatrixLastRowSparseGeneric(MatrixBlock in, MatrixBlock aggVal) throws DMLRuntimeException {
//sparse-safe operation
if (in.isEmptyBlock(false))
return;
SparseBlock a = in.getSparseBlock();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int m = in.rlen;
final int rlen = Math.min(a.numRows(), m);
for (int i = 0; i < rlen - 1; i++) {
if (!a.isEmpty(i)) {
int apos = a.pos(i);
int alen = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
for (int j = apos; j < apos + alen; j++) {
int jix = aix[j];
double corr = in.quickGetValue(m - 1, jix);
buffer1._sum = aggVal.quickGetValue(i, jix);
buffer1._correction = aggVal.quickGetValue(m - 1, jix);
akplus.execute(buffer1, avals[j], corr);
aggVal.quickSetValue(i, jix, buffer1._sum);
aggVal.quickSetValue(m - 1, jix, buffer1._correction);
}
}
}
//note: nnz of aggVal/aggCorr maintained internally
aggVal.examSparsity();
}
Aggregations