use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixAllDense.
private static void aggregateBinaryMatrixAllDense(MatrixBlock in, MatrixBlock aggVal, MatrixBlock aggCorr) {
if (in.denseBlock == null || in.isEmptyBlock(false))
return;
// allocate output arrays (if required)
// should always stay in dense
aggVal.allocateDenseBlock();
// should always stay in dense
aggCorr.allocateDenseBlock();
double[] a = in.getDenseBlockValues();
double[] c = aggVal.getDenseBlockValues();
double[] cc = aggCorr.getDenseBlockValues();
KahanObject buffer1 = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
final int len = Math.min(a.length, in.rlen * in.clen);
int nnzC = 0;
int nnzCC = 0;
for (int i = 0; i < len; i++) {
buffer1._sum = c[i];
buffer1._correction = cc[i];
akplus.execute2(buffer1, a[i]);
c[i] = buffer1._sum;
cc[i] = buffer1._correction;
nnzC += (buffer1._sum != 0) ? 1 : 0;
nnzCC += (buffer1._correction != 0) ? 1 : 0;
}
aggVal.nonZeros = nnzC;
aggCorr.nonZeros = nnzCC;
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class LibMatrixAgg method aggregateUnaryMatrixDense.
private static void aggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) {
final int n = in.clen;
// note: due to corrections, even the output might be a large dense block
DenseBlock a = in.getDenseBlock();
DenseBlock c = out.getDenseBlock();
switch(optype) {
case KAHAN_SUM:
{
// SUM/TRACE via k+,
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM
ixFn instanceof ReduceAll)
d_uakp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// ROWSUM
ixFn instanceof ReduceCol)
d_uarkp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// COLSUM
ixFn instanceof ReduceRow)
d_uackp(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (// TRACE
ixFn instanceof ReduceDiag)
d_uakptrace(a, c, n, kbuff, (KahanPlus) vFn, rl, ru);
break;
}
case KAHAN_SUM_SQ:
{
// SUM_SQ via k+,
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM_SQ
ixFn instanceof ReduceAll)
d_uasqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (// ROWSUM_SQ
ixFn instanceof ReduceCol)
d_uarsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (// COLSUM_SQ
ixFn instanceof ReduceRow)
d_uacsqkp(a, c, n, kbuff, (KahanPlusSq) vFn, rl, ru);
break;
}
case CUM_KAHAN_SUM:
{
// CUMSUM
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
d_ucumkp(in.getDenseBlock(), null, out.getDenseBlock(), n, kbuff, kplus, rl, ru);
break;
}
case CUM_PROD:
{
// CUMPROD
d_ucumm(in.getDenseBlockValues(), null, out.getDenseBlockValues(), n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = (optype == AggType.CUM_MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
d_ucummxx(in.getDenseBlockValues(), null, out.getDenseBlockValues(), n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN:
case MAX:
{
// MAX/MIN
double init = (optype == AggType.MAX) ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
if (// MIN/MAX
ixFn instanceof ReduceAll)
d_uamxx(a, c, n, init, (Builtin) vFn, rl, ru);
else if (// ROWMIN/ROWMAX
ixFn instanceof ReduceCol)
d_uarmxx(a, c, n, init, (Builtin) vFn, rl, ru);
else if (// COLMIN/COLMAX
ixFn instanceof ReduceRow)
d_uacmxx(a, c, n, init, (Builtin) vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = Double.NEGATIVE_INFINITY;
if (// ROWINDEXMAX
ixFn instanceof ReduceCol)
d_uarimxx(a, c, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.POSITIVE_INFINITY;
if (// ROWINDEXMIN
ixFn instanceof ReduceCol)
d_uarimin(a, c, n, init, (Builtin) vFn, rl, ru);
break;
}
case MEAN:
{
// MEAN
KahanObject kbuff = new KahanObject(0, 0);
if (// MEAN
ixFn instanceof ReduceAll)
d_uamean(a, c, n, kbuff, (Mean) vFn, rl, ru);
else if (// ROWMEAN
ixFn instanceof ReduceCol)
d_uarmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
else if (// COLMEAN
ixFn instanceof ReduceRow)
d_uacmean(a, c, n, kbuff, (Mean) vFn, rl, ru);
break;
}
case VAR:
{
// VAR
CM_COV_Object cbuff = new CM_COV_Object();
if (// VAR
ixFn instanceof ReduceAll)
d_uavar(a, c, n, cbuff, (CM) vFn, rl, ru);
else if (// ROWVAR
ixFn instanceof ReduceCol)
d_uarvar(a, c, n, cbuff, (CM) vFn, rl, ru);
else if (// COLVAR
ixFn instanceof ReduceRow)
d_uacvar(a, c, n, cbuff, (CM) vFn, rl, ru);
break;
}
case PROD:
{
// PROD
if (// PROD
ixFn instanceof ReduceAll)
d_uam(a, c, n, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class LibMatrixAgg method groupedAggregateKahanPlus.
/**
* This is a specific implementation for aggregate(fn="sum"), where we use KahanPlus for numerical
* stability. In contrast to other functions of aggregate, this implementation supports row and column
* vectors for target and exploits sparse representations since KahanPlus is sparse-safe.
*
* @param groups matrix block groups
* @param target matrix block target
* @param weights matrix block weights
* @param result matrix block result
* @param numGroups number of groups
* @param aggop aggregate operator
* @param cl column lower index
* @param cu column upper index
*/
private static void groupedAggregateKahanPlus(MatrixBlock groups, MatrixBlock target, MatrixBlock weights, MatrixBlock result, int numGroups, AggregateOperator aggop, int cl, int cu) {
boolean rowVector = (target.getNumRows() == 1 && target.getNumColumns() > 1);
// default weight
double w = 1;
// skip empty blocks (sparse-safe operation)
if (target.isEmptyBlock(false))
return;
// init group buffers
int numCols2 = cu - cl;
KahanObject[][] buffer = new KahanObject[numGroups][numCols2];
for (int i = 0; i < numGroups; i++) for (int j = 0; j < numCols2; j++) buffer[i][j] = new KahanObject(aggop.initialValue, 0);
if (// target is rowvector
rowVector) {
if (// SPARSE target
target.sparse) {
if (!target.sparseBlock.isEmpty(0)) {
int pos = target.sparseBlock.pos(0);
int len = target.sparseBlock.size(0);
int[] aix = target.sparseBlock.indexes(0);
double[] avals = target.sparseBlock.values(0);
for (// for each nnz
int j = pos; // for each nnz
j < pos + len; // for each nnz
j++) {
int g = (int) groups.quickGetValue(aix[j], 0);
if (g > numGroups)
continue;
if (weights != null)
w = weights.quickGetValue(aix[j], 0);
aggop.increOp.fn.execute(buffer[g - 1][0], avals[j] * w);
}
}
} else // DENSE target
{
double[] a = target.getDenseBlockValues();
for (int i = 0; i < target.getNumColumns(); i++) {
double d = a[i];
if (// sparse-safe
d != 0) {
int g = (int) groups.quickGetValue(i, 0);
if (g > numGroups)
continue;
if (weights != null)
w = weights.quickGetValue(i, 0);
// buffer is 0-indexed, whereas range of values for g = [1,numGroups]
aggop.increOp.fn.execute(buffer[g - 1][0], d * w);
}
}
}
} else // column vector or matrix
{
if (// SPARSE target
target.sparse) {
SparseBlock a = target.sparseBlock;
for (int i = 0; i < groups.getNumRows(); i++) {
int g = (int) groups.quickGetValue(i, 0);
if (g > numGroups)
continue;
if (!a.isEmpty(i)) {
int pos = a.pos(i);
int len = a.size(i);
int[] aix = a.indexes(i);
double[] avals = a.values(i);
int j = (cl == 0) ? 0 : a.posFIndexGTE(i, cl);
j = (j >= 0) ? pos + j : pos + len;
for (; // for each nnz
j < pos + len && aix[j] < cu; // for each nnz
j++) {
if (weights != null)
w = weights.quickGetValue(aix[j], 0);
aggop.increOp.fn.execute(buffer[g - 1][aix[j] - cl], avals[j] * w);
}
}
}
} else // DENSE target
{
DenseBlock a = target.getDenseBlock();
for (int i = 0; i < groups.getNumRows(); i++) {
int g = (int) groups.quickGetValue(i, 0);
if (g > numGroups)
continue;
double[] avals = a.values(i);
int aix = a.pos(i);
for (int j = cl; j < cu; j++) {
double d = avals[aix + j];
if (d != 0) {
// sparse-safe
if (weights != null)
w = weights.quickGetValue(i, 0);
// buffer is 0-indexed, whereas range of values for g = [1,numGroups]
aggop.increOp.fn.execute(buffer[g - 1][j - cl], d * w);
}
}
}
}
}
// extract the results from group buffers
for (int i = 0; i < numGroups; i++) for (int j = 0; j < numCols2; j++) result.appendValue(i, j + cl, buffer[i][j]._sum);
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class LibMatrixAgg method aggregateTernaryDense.
private static void aggregateTernaryDense(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, IndexFunction ixFn, int rl, int ru) {
// compute block operations
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
double[] a = in1.getDenseBlockValues();
double[] b1 = in2.getDenseBlockValues();
// if null, literal 1
double[] b2 = (in3 != null) ? in3.getDenseBlockValues() : null;
final int n = in1.clen;
if (// tak+*
ixFn instanceof ReduceAll) {
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kplus.execute2(kbuff, val);
}
ret.quickSetValue(0, 0, kbuff._sum);
ret.quickSetValue(0, 1, kbuff._correction);
} else // tack+*
{
double[] c = ret.getDenseBlockValues();
for (int i = rl, ix = rl * n; i < ru; i++) for (int j = 0; j < n; j++, ix++) {
double b2val = (b2 != null) ? b2[ix] : 1;
double val = a[ix] * b1[ix] * b2val;
kbuff._sum = c[j];
kbuff._correction = c[j + n];
kplus.execute2(kbuff, val);
c[j] = kbuff._sum;
c[j + n] = kbuff._correction;
}
}
}
use of org.apache.sysml.runtime.instructions.cp.KahanObject in project systemml by apache.
the class LibMatrixAgg method aggregateBinaryMatrixLastColDenseGeneric.
private static void aggregateBinaryMatrixLastColDenseGeneric(MatrixBlock in, MatrixBlock aggVal) {
if (in.denseBlock == null || in.isEmptyBlock(false))
return;
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseBlockValues();
KahanObject buffer = new KahanObject(0, 0);
KahanPlus akplus = KahanPlus.getKahanPlusFnObject();
// incl implicit nnz maintenance
for (int i = 0, ix = 0; i < m; i++, ix += n) for (int j = 0; j < n - 1; j++) {
buffer._sum = aggVal.quickGetValue(i, j);
buffer._correction = aggVal.quickGetValue(i, n - 1);
akplus.execute(buffer, a[ix + j], a[ix + j + 1]);
aggVal.quickSetValue(i, j, buffer._sum);
aggVal.quickSetValue(i, n - 1, buffer._correction);
}
// note: nnz of aggVal maintained internally
aggVal.examSparsity();
}
Aggregations