use of org.apache.sysml.runtime.functionobjects.IndexFunction in project incubator-systemml by apache.
the class AggregateUnaryGPUInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
GPUStatistics.incrementNoOfExecutedGPUInst();
String opcode = getOpcode();
// nrow, ncol & length should either read or refresh metadata
if (opcode.equalsIgnoreCase("nrow") || opcode.equalsIgnoreCase("ncol") || opcode.equalsIgnoreCase("length")) {
throw new DMLRuntimeException("nrow, ncol & length should not be compiled as GPU instructions!");
}
// get inputs
MatrixObject in1 = getMatrixInputForGPUInstruction(ec, _input1.getName());
int rlen = (int) in1.getNumRows();
int clen = (int) in1.getNumColumns();
IndexFunction indexFunction = ((AggregateUnaryOperator) _optr).indexFn;
if (indexFunction instanceof ReduceRow) {
// COL{SUM, MAX...}
ec.setMetaData(_output.getName(), 1, clen);
} else if (indexFunction instanceof ReduceCol) {
// ROW{SUM, MAX,...}
ec.setMetaData(_output.getName(), rlen, 1);
}
LibMatrixCUDA.unaryAggregate(ec, ec.getGPUContext(0), getExtendedOpcode(), in1, _output.getName(), (AggregateUnaryOperator) _optr);
// release inputs/outputs
ec.releaseMatrixInputForGPUInstruction(_input1.getName());
// and set in the execution context by invoking the setScalarOutput
if (indexFunction instanceof ReduceRow || indexFunction instanceof ReduceCol) {
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
}
use of org.apache.sysml.runtime.functionobjects.IndexFunction in project incubator-systemml by apache.
the class LibMatrixAgg method getAggType.
private static AggType getAggType(AggregateUnaryOperator op) {
ValueFunction vfn = op.aggOp.increOp.fn;
IndexFunction ifn = op.indexFn;
// (kahan) sum / sum squared / trace (for ReduceDiag)
if (vfn instanceof KahanFunction && (op.aggOp.correctionLocation == CorrectionLocationType.LASTCOLUMN || op.aggOp.correctionLocation == CorrectionLocationType.LASTROW) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow || ifn instanceof ReduceDiag)) {
if (vfn instanceof KahanPlus)
return AggType.KAHAN_SUM;
else if (vfn instanceof KahanPlusSq)
return AggType.KAHAN_SUM_SQ;
}
// mean
if (vfn instanceof Mean && (op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTTWOROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.MEAN;
}
// variance
if (vfn instanceof CM && ((CM) vfn).getAggOpType() == AggregateOperationTypes.VARIANCE && (op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURCOLUMNS || op.aggOp.correctionLocation == CorrectionLocationType.LASTFOURROWS) && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
return AggType.VAR;
}
// prod
if (vfn instanceof Multiply && ifn instanceof ReduceAll) {
return AggType.PROD;
}
// min / max
if (vfn instanceof Builtin && (ifn instanceof ReduceAll || ifn instanceof ReduceCol || ifn instanceof ReduceRow)) {
BuiltinCode bfcode = ((Builtin) vfn).bFunc;
switch(bfcode) {
case MAX:
return AggType.MAX;
case MIN:
return AggType.MIN;
case MAXINDEX:
return AggType.MAX_INDEX;
case MININDEX:
return AggType.MIN_INDEX;
// do nothing
default:
}
}
return AggType.INVALID;
}
use of org.apache.sysml.runtime.functionobjects.IndexFunction in project incubator-systemml by apache.
the class LibMatrixAgg method aggregateTernary.
public static MatrixBlock aggregateTernary(MatrixBlock in1, MatrixBlock in2, MatrixBlock in3, MatrixBlock ret, AggregateTernaryOperator op, int k) {
// fall back to sequential version if necessary
if (k <= 1 || in1.nonZeros + in2.nonZeros < PAR_NUMCELL_THRESHOLD || in1.rlen <= k / 2 || (!(op.indexFn instanceof ReduceCol) && ret.clen * 8 * k > PAR_INTERMEDIATE_SIZE_THRESHOLD)) {
return aggregateTernary(in1, in2, in3, ret, op);
}
// early abort if any block is empty
if (in1.isEmptyBlock(false) || in2.isEmptyBlock(false) || in3 != null && in3.isEmptyBlock(false)) {
return ret;
}
try {
ExecutorService pool = CommonThreadPool.get(k);
ArrayList<AggTernaryTask> tasks = new ArrayList<>();
int blklen = (int) (Math.ceil((double) in1.rlen / k));
IndexFunction ixFn = op.indexFn;
for (int i = 0; i < k & i * blklen < in1.rlen; i++) tasks.add(new AggTernaryTask(in1, in2, in3, ret, ixFn, i * blklen, Math.min((i + 1) * blklen, in1.rlen)));
List<Future<MatrixBlock>> rtasks = pool.invokeAll(tasks);
pool.shutdown();
// aggregate partial results and error handling
// for init
ret.copy(rtasks.get(0).get());
for (int i = 1; i < rtasks.size(); i++) aggregateFinalResult(op.aggOp, ret, rtasks.get(i).get());
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// cleanup output and change representation (if necessary)
ret.recomputeNonZeros();
ret.examSparsity();
return ret;
}
use of org.apache.sysml.runtime.functionobjects.IndexFunction in project systemml by apache.
the class InstructionUtils method parseAggregateTernaryOperator.
public static AggregateTernaryOperator parseAggregateTernaryOperator(String opcode, int numThreads) {
CorrectionLocationType corr = opcode.equalsIgnoreCase("tak+*") ? CorrectionLocationType.LASTCOLUMN : CorrectionLocationType.LASTROW;
AggregateOperator agg = new AggregateOperator(0, KahanPlus.getKahanPlusFnObject(), true, corr);
IndexFunction ixfun = opcode.equalsIgnoreCase("tak+*") ? ReduceAll.getReduceAllFnObject() : ReduceRow.getReduceRowFnObject();
return new AggregateTernaryOperator(Multiply.getMultiplyFnObject(), agg, ixfun, numThreads);
}
use of org.apache.sysml.runtime.functionobjects.IndexFunction in project systemml by apache.
the class AggregateUnaryGPUInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
GPUStatistics.incrementNoOfExecutedGPUInst();
String opcode = getOpcode();
// nrow, ncol & length should either read or refresh metadata
if (opcode.equalsIgnoreCase("nrow") || opcode.equalsIgnoreCase("ncol") || opcode.equalsIgnoreCase("length")) {
throw new DMLRuntimeException("nrow, ncol & length should not be compiled as GPU instructions!");
}
// get inputs
MatrixObject in1 = getMatrixInputForGPUInstruction(ec, _input1.getName());
int rlen = (int) in1.getNumRows();
int clen = (int) in1.getNumColumns();
IndexFunction indexFunction = ((AggregateUnaryOperator) _optr).indexFn;
if (indexFunction instanceof ReduceRow) {
// COL{SUM, MAX...}
ec.setMetaData(_output.getName(), 1, clen);
} else if (indexFunction instanceof ReduceCol) {
// ROW{SUM, MAX,...}
ec.setMetaData(_output.getName(), rlen, 1);
}
LibMatrixCUDA.unaryAggregate(ec, ec.getGPUContext(0), getExtendedOpcode(), in1, _output.getName(), (AggregateUnaryOperator) _optr);
// release inputs/outputs
ec.releaseMatrixInputForGPUInstruction(_input1.getName());
// and set in the execution context by invoking the setScalarOutput
if (indexFunction instanceof ReduceRow || indexFunction instanceof ReduceCol) {
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
}
Aggregations