use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateUnaryMatrixDense.
private static void aggregateUnaryMatrixDense(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) throws DMLRuntimeException {
final int m = in.rlen;
final int n = in.clen;
double[] a = in.getDenseBlock();
double[] c = out.getDenseBlock();
switch(optype) {
case //SUM/TRACE via k+,
KAHAN_SUM:
{
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM
ixFn instanceof ReduceAll)
d_uakp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//ROWSUM
ixFn instanceof ReduceCol)
d_uarkp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//COLSUM
ixFn instanceof ReduceRow)
d_uackp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//TRACE
ixFn instanceof ReduceDiag)
d_uakptrace(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
break;
}
case //SUM_SQ via k+,
KAHAN_SUM_SQ:
{
KahanObject kbuff = new KahanObject(0, 0);
if (//SUM_SQ
ixFn instanceof ReduceAll)
d_uasqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (//ROWSUM_SQ
ixFn instanceof ReduceCol)
d_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (//COLSUM_SQ
ixFn instanceof ReduceRow)
d_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
break;
}
case //CUMSUM
CUM_KAHAN_SUM:
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
d_ucumkp(a, null, c, m, n, kbuff, kplus, rl, ru);
break;
}
case //CUMPROD
CUM_PROD:
{
d_ucumm(a, null, c, m, n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = Double.MAX_VALUE * ((optype == AggType.CUM_MAX) ? -1 : 1);
d_ucummxx(a, null, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN:
case //MAX/MIN
MAX:
{
double init = Double.MAX_VALUE * ((optype == AggType.MAX) ? -1 : 1);
if (// MIN/MAX
ixFn instanceof ReduceAll)
d_uamxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
else if (//ROWMIN/ROWMAX
ixFn instanceof ReduceCol)
d_uarmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
else if (//COLMIN/COLMAX
ixFn instanceof ReduceRow)
d_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = -Double.MAX_VALUE;
if (//ROWINDEXMAX
ixFn instanceof ReduceCol)
d_uarimxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.MAX_VALUE;
if (//ROWINDEXMIN
ixFn instanceof ReduceCol)
d_uarimin(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case //MEAN
MEAN:
{
KahanObject kbuff = new KahanObject(0, 0);
if (// MEAN
ixFn instanceof ReduceAll)
d_uamean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
else if (//ROWMEAN
ixFn instanceof ReduceCol)
d_uarmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
else if (//COLMEAN
ixFn instanceof ReduceRow)
d_uacmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
break;
}
case //VAR
VAR:
{
CM_COV_Object cbuff = new CM_COV_Object();
if (//VAR
ixFn instanceof ReduceAll)
d_uavar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
else if (//ROWVAR
ixFn instanceof ReduceCol)
d_uarvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
else if (//COLVAR
ixFn instanceof ReduceRow)
d_uacvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
break;
}
case //PROD
PROD:
{
if (// PROD
ixFn instanceof ReduceAll)
d_uam(a, c, m, n, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateUnaryMatrixSparse.
private static void aggregateUnaryMatrixSparse(MatrixBlock in, MatrixBlock out, AggType optype, ValueFunction vFn, IndexFunction ixFn, int rl, int ru) throws DMLRuntimeException {
final int m = in.rlen;
final int n = in.clen;
SparseBlock a = in.getSparseBlock();
double[] c = out.getDenseBlock();
switch(optype) {
case //SUM via k+
KAHAN_SUM:
{
KahanObject kbuff = new KahanObject(0, 0);
if (// SUM
ixFn instanceof ReduceAll)
s_uakp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//ROWSUM
ixFn instanceof ReduceCol)
s_uarkp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//COLSUM
ixFn instanceof ReduceRow)
s_uackp(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
else if (//TRACE
ixFn instanceof ReduceDiag)
s_uakptrace(a, c, m, n, kbuff, (KahanPlus) vFn, rl, ru);
break;
}
case //SUM_SQ via k+
KAHAN_SUM_SQ:
{
KahanObject kbuff = new KahanObject(0, 0);
if (//SUM_SQ
ixFn instanceof ReduceAll)
s_uasqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (//ROWSUM_SQ
ixFn instanceof ReduceCol)
s_uarsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
else if (//COLSUM_SQ
ixFn instanceof ReduceRow)
s_uacsqkp(a, c, m, n, kbuff, (KahanPlusSq) vFn, rl, ru);
break;
}
case //CUMSUM
CUM_KAHAN_SUM:
{
KahanObject kbuff = new KahanObject(0, 0);
KahanPlus kplus = KahanPlus.getKahanPlusFnObject();
s_ucumkp(a, null, c, m, n, kbuff, kplus, rl, ru);
break;
}
case //CUMPROD
CUM_PROD:
{
s_ucumm(a, null, c, m, n, rl, ru);
break;
}
case CUM_MIN:
case CUM_MAX:
{
double init = Double.MAX_VALUE * ((optype == AggType.CUM_MAX) ? -1 : 1);
s_ucummxx(a, null, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN:
case //MAX/MIN
MAX:
{
double init = Double.MAX_VALUE * ((optype == AggType.MAX) ? -1 : 1);
if (// MIN/MAX
ixFn instanceof ReduceAll)
s_uamxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
else if (//ROWMIN/ROWMAX
ixFn instanceof ReduceCol)
s_uarmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
else if (//COLMIN/COLMAX
ixFn instanceof ReduceRow)
s_uacmxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MAX_INDEX:
{
double init = -Double.MAX_VALUE;
if (//ROWINDEXMAX
ixFn instanceof ReduceCol)
s_uarimxx(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MIN_INDEX:
{
double init = Double.MAX_VALUE;
if (//ROWINDEXMAX
ixFn instanceof ReduceCol)
s_uarimin(a, c, m, n, init, (Builtin) vFn, rl, ru);
break;
}
case MEAN:
{
KahanObject kbuff = new KahanObject(0, 0);
if (// MEAN
ixFn instanceof ReduceAll)
s_uamean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
else if (//ROWMEAN
ixFn instanceof ReduceCol)
s_uarmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
else if (//COLMEAN
ixFn instanceof ReduceRow)
s_uacmean(a, c, m, n, kbuff, (Mean) vFn, rl, ru);
break;
}
case //VAR
VAR:
{
CM_COV_Object cbuff = new CM_COV_Object();
if (//VAR
ixFn instanceof ReduceAll)
s_uavar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
else if (//ROWVAR
ixFn instanceof ReduceCol)
s_uarvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
else if (//COLVAR
ixFn instanceof ReduceRow)
s_uacvar(a, c, m, n, cbuff, (CM) vFn, rl, ru);
break;
}
case //PROD
PROD:
{
if (// PROD
ixFn instanceof ReduceAll)
s_uam(a, c, m, n, rl, ru);
break;
}
default:
throw new DMLRuntimeException("Unsupported aggregation type: " + optype);
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.
the class LibMatrixCUDA method unaryAggregate.
//********************************************************************/
//***************** END OF MATRIX MULTIPLY Functions *****************/
//********************************************************************/
//********************************************************************/
//**************** UNARY AGGREGATE Functions ************************/
//********************************************************************/
/**
* Entry point to perform Unary aggregate operations on the GPU.
* The execution context object is used to allocate memory for the GPU.
* @param ec Instance of {@link ExecutionContext}, from which the output variable will be allocated
* @param gCtx a valid {@link GPUContext}
* @param instName name of the invoking instruction to record{@link Statistics}.
* @param in1 input matrix
* @param output output matrix/scalar name
* @param op Instance of {@link AggregateUnaryOperator} which encapsulates the direction of reduction/aggregation and the reduction operation.
* @throws DMLRuntimeException if {@link DMLRuntimeException} occurs
*/
public static void unaryAggregate(ExecutionContext ec, GPUContext gCtx, String instName, MatrixObject in1, String output, AggregateUnaryOperator op) throws DMLRuntimeException {
if (ec.getGPUContext() != gCtx)
throw new DMLRuntimeException("GPU : Invalid internal state, the GPUContext set with the ExecutionContext is not the same used to run this LibMatrixCUDA function");
LOG.trace("GPU : unaryAggregate" + ", GPUContext=" + gCtx);
final int REDUCTION_ALL = 1;
final int REDUCTION_ROW = 2;
final int REDUCTION_COL = 3;
final int REDUCTION_DIAG = 4;
// A kahan sum implemention is not provided. is a "uak+" or other kahan operator is encountered,
// it just does regular summation reduction.
final int OP_PLUS = 1;
final int OP_PLUS_SQ = 2;
final int OP_MEAN = 3;
final int OP_VARIANCE = 4;
final int OP_MULTIPLY = 5;
final int OP_MAX = 6;
final int OP_MIN = 7;
final int OP_MAXINDEX = 8;
final int OP_MININDEX = 9;
// Sanity Checks
if (!in1.getGPUObject(gCtx).isAllocated())
throw new DMLRuntimeException("Internal Error - The input is not allocated for a GPU Aggregate Unary:" + in1.getGPUObject(gCtx).isAllocated());
boolean isSparse = in1.getGPUObject(gCtx).isSparse();
IndexFunction indexFn = op.indexFn;
AggregateOperator aggOp = op.aggOp;
// Convert Reduction direction to a number to pass to CUDA kernel
int reductionDirection = -1;
if (indexFn instanceof ReduceAll) {
reductionDirection = REDUCTION_ALL;
} else if (indexFn instanceof ReduceRow) {
reductionDirection = REDUCTION_ROW;
} else if (indexFn instanceof ReduceCol) {
reductionDirection = REDUCTION_COL;
} else if (indexFn instanceof ReduceDiag) {
reductionDirection = REDUCTION_DIAG;
} else {
throw new DMLRuntimeException("Internal Error - Invalid index function type, only reducing along rows, columns, diagonals or all elements is supported in Aggregate Unary operations");
}
assert reductionDirection != -1 : "Internal Error - Incorrect type of reduction direction set for aggregate unary GPU instruction";
// Convert function type to a number to pass to the CUDA Kernel
int opIndex = -1;
if (aggOp.increOp.fn instanceof KahanPlus) {
opIndex = OP_PLUS;
} else if (aggOp.increOp.fn instanceof KahanPlusSq) {
opIndex = OP_PLUS_SQ;
} else if (aggOp.increOp.fn instanceof Mean) {
opIndex = OP_MEAN;
} else if (aggOp.increOp.fn instanceof CM) {
assert ((CM) aggOp.increOp.fn).getAggOpType() == CMOperator.AggregateOperationTypes.VARIANCE : "Internal Error - Invalid Type of CM operator for Aggregate Unary operation on GPU";
opIndex = OP_VARIANCE;
} else if (aggOp.increOp.fn instanceof Plus) {
opIndex = OP_PLUS;
} else if (aggOp.increOp.fn instanceof Multiply) {
opIndex = OP_MULTIPLY;
} else if (aggOp.increOp.fn instanceof Builtin) {
Builtin b = (Builtin) aggOp.increOp.fn;
switch(b.bFunc) {
case MAX:
opIndex = OP_MAX;
break;
case MIN:
opIndex = OP_MIN;
break;
case MAXINDEX:
opIndex = OP_MAXINDEX;
break;
case MININDEX:
opIndex = OP_MININDEX;
break;
default:
new DMLRuntimeException("Internal Error - Unsupported Builtin Function for Aggregate unary being done on GPU");
}
} else {
throw new DMLRuntimeException("Internal Error - Aggregate operator has invalid Value function");
}
assert opIndex != -1 : "Internal Error - Incorrect type of operation set for aggregate unary GPU instruction";
int rlen = (int) in1.getNumRows();
int clen = (int) in1.getNumColumns();
if (isSparse) {
// The strategy for the time being is to convert sparse to dense
// until a sparse specific kernel is written.
in1.getGPUObject(gCtx).sparseToDense(instName);
// long nnz = in1.getNnz();
// assert nnz > 0 : "Internal Error - number of non zeroes set to " + nnz + " in Aggregate Binary for GPU";
// MatrixObject out = ec.getSparseMatrixOutputForGPUInstruction(output, nnz);
// throw new DMLRuntimeException("Internal Error - Not implemented");
}
Pointer out = null;
if (reductionDirection == REDUCTION_COL || reductionDirection == REDUCTION_ROW) {
// Matrix output
MatrixObject out1 = getDenseMatrixOutputForGPUInstruction(ec, instName, output);
out = getDensePointer(gCtx, out1, instName);
}
Pointer in = getDensePointer(gCtx, in1, instName);
int size = rlen * clen;
// For scalars, set the scalar output in the Execution Context object
switch(opIndex) {
case OP_PLUS:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
// The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
reduceRow(gCtx, instName, "reduce_row_sum", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_sum", in, out, rlen, clen);
break;
}
case REDUCTION_DIAG:
throw new DMLRuntimeException("Internal Error - Row, Column and Diag summation not implemented yet");
}
break;
}
case OP_PLUS_SQ:
{
// Calculate the squares in a temporary object tmp
Pointer tmp = gCtx.allocate(instName, size * Sizeof.DOUBLE);
squareMatrix(gCtx, instName, in, tmp, rlen, clen);
// Then do the sum on the temporary object and free it
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", tmp, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
// The names are a bit misleading, REDUCTION_COL refers to the direction (reduce all elements in a column)
reduceRow(gCtx, instName, "reduce_row_sum", tmp, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_sum", tmp, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for summation squared");
}
gCtx.cudaFreeHelper(instName, tmp);
break;
}
case OP_MEAN:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
double mean = result / size;
ec.setScalarOutput(output, new DoubleObject(mean));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for mean");
}
break;
}
case OP_MULTIPLY:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_prod", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for multiplication");
}
break;
}
case OP_MAX:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_max", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_max", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_max", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for max");
}
break;
}
case OP_MIN:
{
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_min", in, size);
ec.setScalarOutput(output, new DoubleObject(result));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_min", in, out, rlen, clen);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_min", in, out, rlen, clen);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for min");
}
break;
}
case OP_VARIANCE:
{
// Temporary GPU array for
Pointer tmp = gCtx.allocate(instName, size * Sizeof.DOUBLE);
Pointer tmp2 = gCtx.allocate(instName, size * Sizeof.DOUBLE);
switch(reductionDirection) {
case REDUCTION_ALL:
{
double result = reduceAll(gCtx, instName, "reduce_sum", in, size);
double mean = result / size;
// Subtract mean from every element in the matrix
ScalarOperator minusOp = new RightScalarOperator(Minus.getMinusFnObject(), mean);
matrixScalarOp(gCtx, instName, in, mean, rlen, clen, tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
double result2 = reduceAll(gCtx, instName, "reduce_sum", tmp2, size);
double variance = result2 / (size - 1);
ec.setScalarOutput(output, new DoubleObject(variance));
break;
}
case REDUCTION_COL:
{
reduceRow(gCtx, instName, "reduce_row_mean", in, out, rlen, clen);
// Subtract the row-wise mean from every element in the matrix
BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.COLUMN.code(), tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
Pointer tmpRow = gCtx.allocate(instName, rlen * Sizeof.DOUBLE);
reduceRow(gCtx, instName, "reduce_row_sum", tmp2, tmpRow, rlen, clen);
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), clen - 1);
matrixScalarOp(gCtx, instName, tmpRow, clen - 1, rlen, 1, out, divideOp);
gCtx.cudaFreeHelper(instName, tmpRow);
break;
}
case REDUCTION_ROW:
{
reduceCol(gCtx, instName, "reduce_col_mean", in, out, rlen, clen);
// Subtract the columns-wise mean from every element in the matrix
BinaryOperator minusOp = new BinaryOperator(Minus.getMinusFnObject());
matrixMatrixOp(gCtx, instName, in, out, rlen, clen, VectorShape.NONE.code(), VectorShape.ROW.code(), tmp, minusOp);
squareMatrix(gCtx, instName, tmp, tmp2, rlen, clen);
Pointer tmpCol = gCtx.allocate(instName, clen * Sizeof.DOUBLE);
reduceCol(gCtx, instName, "reduce_col_sum", tmp2, tmpCol, rlen, clen);
ScalarOperator divideOp = new RightScalarOperator(Divide.getDivideFnObject(), rlen - 1);
matrixScalarOp(gCtx, instName, tmpCol, rlen - 1, 1, clen, out, divideOp);
gCtx.cudaFreeHelper(instName, tmpCol);
break;
}
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for variance");
}
gCtx.cudaFreeHelper(instName, tmp);
gCtx.cudaFreeHelper(instName, tmp2);
break;
}
case OP_MAXINDEX:
{
switch(reductionDirection) {
case REDUCTION_COL:
throw new DMLRuntimeException("Internal Error - Column maxindex of matrix not implemented yet for GPU ");
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for maxindex");
}
// break;
}
case OP_MININDEX:
{
switch(reductionDirection) {
case REDUCTION_COL:
throw new DMLRuntimeException("Internal Error - Column minindex of matrix not implemented yet for GPU ");
default:
throw new DMLRuntimeException("Internal Error - Unsupported reduction direction for minindex");
}
// break;
}
default:
throw new DMLRuntimeException("Internal Error - Invalid GPU Unary aggregate function!");
}
}
use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.
the class CompressedMatrixBlock method aggregateUnaryOperations.
@Override
public MatrixValue aggregateUnaryOperations(AggregateUnaryOperator op, MatrixValue result, int blockingFactorRow, int blockingFactorCol, MatrixIndexes indexesIn, boolean inCP) throws DMLRuntimeException {
//call uncompressed matrix mult if necessary
if (!isCompressed()) {
return super.aggregateUnaryOperations(op, result, blockingFactorRow, blockingFactorCol, indexesIn, inCP);
}
//check for supported operations
if (!(op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq || (op.aggOp.increOp.fn instanceof Builtin && (((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MIN || ((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX)))) {
throw new DMLRuntimeException("Unary aggregates other than sum/sumsq/min/max not supported yet.");
}
Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
//prepare output dimensions
CellIndex tempCellIndex = new CellIndex(-1, -1);
op.indexFn.computeDimension(rlen, clen, tempCellIndex);
if (op.aggOp.correctionExists) {
switch(op.aggOp.correctionLocation) {
case LASTROW:
tempCellIndex.row++;
break;
case LASTCOLUMN:
tempCellIndex.column++;
break;
case LASTTWOROWS:
tempCellIndex.row += 2;
break;
case LASTTWOCOLUMNS:
tempCellIndex.column += 2;
break;
default:
throw new DMLRuntimeException("unrecognized correctionLocation: " + op.aggOp.correctionLocation);
}
}
// initialize and allocate the result
if (result == null)
result = new MatrixBlock(tempCellIndex.row, tempCellIndex.column, false);
else
result.reset(tempCellIndex.row, tempCellIndex.column, false);
MatrixBlock ret = (MatrixBlock) result;
ret.allocateDenseBlock();
//special handling init value for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
double val = Double.MAX_VALUE * ((((Builtin) op.aggOp.increOp.fn).getBuiltinCode() == BuiltinCode.MAX) ? -1 : 1);
Arrays.fill(ret.getDenseBlock(), val);
}
//core unary aggregate
if (op.getNumThreads() > 1 && getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD) {
//multi-threaded execution of all groups
ArrayList<ColGroup>[] grpParts = createStaticTaskPartitioning((op.indexFn instanceof ReduceCol) ? 1 : op.getNumThreads(), false);
ColGroupUncompressed uc = getUncompressedColGroup();
try {
//compute uncompressed column group in parallel (otherwise bottleneck)
if (uc != null)
ret = (MatrixBlock) uc.getData().aggregateUnaryOperations(op, ret, blockingFactorRow, blockingFactorCol, indexesIn, false);
//compute all compressed column groups
ExecutorService pool = Executors.newFixedThreadPool(op.getNumThreads());
ArrayList<UnaryAggregateTask> tasks = new ArrayList<UnaryAggregateTask>();
if (op.indexFn instanceof ReduceCol && grpParts.length > 0) {
int blklen = BitmapEncoder.getAlignedBlocksize((int) (Math.ceil((double) rlen / op.getNumThreads())));
for (int i = 0; i < op.getNumThreads() & i * blklen < rlen; i++) tasks.add(new UnaryAggregateTask(grpParts[0], ret, i * blklen, Math.min((i + 1) * blklen, rlen), op));
} else
for (ArrayList<ColGroup> grp : grpParts) tasks.add(new UnaryAggregateTask(grp, ret, 0, rlen, op));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
//aggregate partial results
if (op.indexFn instanceof ReduceAll) {
if (op.aggOp.increOp.fn instanceof KahanFunction) {
KahanObject kbuff = new KahanObject(ret.quickGetValue(0, 0), 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
((KahanFunction) op.aggOp.increOp.fn).execute2(kbuff, tmp);
}
ret.quickSetValue(0, 0, kbuff._sum);
} else {
double val = ret.quickGetValue(0, 0);
for (Future<MatrixBlock> rtask : rtasks) {
double tmp = rtask.get().quickGetValue(0, 0);
val = op.aggOp.increOp.fn.execute(val, tmp);
}
ret.quickSetValue(0, 0, val);
}
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
} else {
//process UC column group
for (ColGroup grp : _colGroups) if (grp instanceof ColGroupUncompressed)
grp.unaryAggregateOperations(op, ret);
//process OLE/RLE column groups
aggregateUnaryOperations(op, _colGroups, ret, 0, rlen);
}
//special handling zeros for rowmins/rowmax
if (op.indexFn instanceof ReduceCol && op.aggOp.increOp.fn instanceof Builtin) {
int[] rnnz = new int[rlen];
for (ColGroup grp : _colGroups) grp.countNonZerosPerRow(rnnz, 0, rlen);
Builtin builtin = (Builtin) op.aggOp.increOp.fn;
for (int i = 0; i < rlen; i++) if (rnnz[i] < clen)
ret.quickSetValue(i, 0, builtin.execute2(ret.quickGetValue(i, 0), 0));
}
//drop correction if necessary
if (op.aggOp.correctionExists && inCP)
ret.dropLastRowsOrColums(op.aggOp.correctionLocation);
//post-processing
ret.recomputeNonZeros();
if (LOG.isDebugEnabled())
LOG.debug("Compressed uagg k=" + op.getNumThreads() + " in " + time.stop());
return ret;
}
use of org.apache.sysml.runtime.functionobjects.KahanPlusSq in project incubator-systemml by apache.
the class LibMatrixAgg method getAggType.
private static AggType getAggType(AggregateUnaryOperator op) {
ValueFunction vfn = op.aggOp.increOp.fn;
IndexFunction ifn = op.indexFn;
//(kahan) sum / sum squared / trace (for ReduceDiag)
if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
if (vfn instanceof KahanPlus)
return AggType.KAHAN_SUM;
else if (vfn instanceof KahanPlusSq)
return AggType.KAHAN_SUM_SQ;
}
//mean
if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.MEAN;
}
//variance
if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.VAR;
}
//prod
if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
return AggType.PROD;
}
//min / max
if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
BuiltinCode bfcode = ((Builtin) vfn).bFunc;
switch(bfcode) {
case MAX:
return AggType.MAX;
case MIN:
return AggType.MIN;
case MAXINDEX:
return AggType.MAX_INDEX;
case MININDEX:
return AggType.MIN_INDEX;
//do nothing
default:
}
}
return AggType.INVALID;
}
Aggregations