use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.
the class AggregateUnarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
// get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in;
// filter input blocks for trace
if (getOpcode().equalsIgnoreCase("uaktrace"))
out = out.filter(new FilterDiagBlocksFunction());
// execute unary aggregate operation
AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
AggregateOperator aggop = _aop;
// perform aggregation if necessary and put output into symbol table
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
JavaRDD<MatrixBlock> out2 = out.map(new RDDUAggFunction2(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
MatrixBlock out3 = RDDAggregateUtils.aggStable(out2, aggop);
// drop correction after aggregation
out3.dropLastRowsOrColumns(aggop.correctionLocation);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out3, getExtendedOpcode());
} else // MULTI_BLOCK or NONE
{
if (_aggtype == SparkAggType.NONE) {
// in case of no block aggregation, we always drop the correction as well as
// use a partitioning-preserving mapvalues
out = out.mapValues(new RDDUAggValueFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
} else if (_aggtype == SparkAggType.MULTI_BLOCK) {
// in case of multi-block aggregation, we always keep the correction
out = out.mapToPair(new RDDUAggFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
// partitioning, drop correction via partitioning-preserving mapvalues)
if (auop.aggOp.correctionExists)
out = out.mapValues(new AggregateDropCorrectionFunction(aggop));
}
// put output RDD handle into symbol table
updateUnaryAggOutputMatrixCharacteristics(sec, auop.indexFn);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.
the class MapmmSPInstruction method parseInstruction.
public static MapmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (!opcode.equalsIgnoreCase(MapMult.OPCODE))
throw new DMLRuntimeException("MapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
CacheType type = CacheType.valueOf(parts[4]);
boolean outputEmpty = Boolean.parseBoolean(parts[5]);
SparkAggType aggtype = SparkAggType.valueOf(parts[6]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new MapmmSPInstruction(aggbin, in1, in2, out, type, outputEmpty, aggtype, opcode, str);
}
use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.
the class PMapmmSPInstruction method parseInstruction.
public static PMapmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (opcode.equalsIgnoreCase(PMapMult.OPCODE)) {
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new PMapmmSPInstruction(aggbin, in1, in2, out, opcode, str);
} else {
throw new DMLRuntimeException("PMapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.
the class ParameterizedBuiltinSPInstruction method parseInstruction.
public static ParameterizedBuiltinSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
// first part is always the opcode
String opcode = parts[0];
if (opcode.equalsIgnoreCase("mapgroupedagg")) {
CPOperand target = new CPOperand(parts[1]);
CPOperand groups = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
HashMap<String, String> paramsMap = new HashMap<>();
paramsMap.put(Statement.GAGG_TARGET, target.getName());
paramsMap.put(Statement.GAGG_GROUPS, groups.getName());
paramsMap.put(Statement.GAGG_NUM_GROUPS, parts[4]);
Operator op = new AggregateOperator(0, KahanPlus.getKahanPlusFnObject(), true, CorrectionLocationType.LASTCOLUMN);
return new ParameterizedBuiltinSPInstruction(op, paramsMap, out, opcode, str, false);
} else {
// last part is always the output
CPOperand out = new CPOperand(parts[parts.length - 1]);
// process remaining parts and build a hash map
HashMap<String, String> paramsMap = constructParameterMap(parts);
// determine the appropriate value function
ValueFunction func = null;
if (opcode.equalsIgnoreCase("groupedagg")) {
// check for mandatory arguments
String fnStr = paramsMap.get("fn");
if (fnStr == null)
throw new DMLRuntimeException("Function parameter is missing in groupedAggregate.");
if (fnStr.equalsIgnoreCase("centralmoment")) {
if (paramsMap.get("order") == null)
throw new DMLRuntimeException("Mandatory \"order\" must be specified when fn=\"centralmoment\" in groupedAggregate.");
}
Operator op = GroupedAggregateInstruction.parseGroupedAggOperator(fnStr, paramsMap.get("order"));
return new ParameterizedBuiltinSPInstruction(op, paramsMap, out, opcode, str, false);
} else if (opcode.equalsIgnoreCase("rmempty")) {
boolean bRmEmptyBC = false;
if (parts.length > 6)
bRmEmptyBC = Boolean.parseBoolean(parts[5]);
func = ParameterizedBuiltin.getParameterizedBuiltinFnObject(opcode);
return new ParameterizedBuiltinSPInstruction(new SimpleOperator(func), paramsMap, out, opcode, str, bRmEmptyBC);
} else if (opcode.equalsIgnoreCase("rexpand") || opcode.equalsIgnoreCase("replace") || opcode.equalsIgnoreCase("transformapply") || opcode.equalsIgnoreCase("transformdecode")) {
func = ParameterizedBuiltin.getParameterizedBuiltinFnObject(opcode);
return new ParameterizedBuiltinSPInstruction(new SimpleOperator(func), paramsMap, out, opcode, str, false);
} else {
throw new DMLRuntimeException("Unknown opcode (" + opcode + ") for ParameterizedBuiltin Instruction.");
}
}
}
use of org.apache.sysml.runtime.matrix.operators.AggregateOperator in project incubator-systemml by apache.
the class SpoofSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// decide upon broadcast side inputs
boolean[] bcVect = determineBroadcastInputs(sec, _in);
boolean[] bcVect2 = getMatrixBroadcastVector(sec, _in, bcVect);
int main = getMainInputIndex(_in, bcVect);
// create joined input rdd w/ replication if needed
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[main].getName());
JavaPairRDD<MatrixIndexes, MatrixBlock[]> in = createJoinedInputRDD(sec, _in, bcVect, (_class.getSuperclass() == SpoofOuterProduct.class));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
// create lists of input broadcasts and scalars
ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices = new ArrayList<>();
ArrayList<ScalarObject> scalars = new ArrayList<>();
for (int i = 0; i < _in.length; i++) {
if (_in[i].getDataType() == DataType.MATRIX && bcVect[i]) {
bcMatrices.add(sec.getBroadcastForVariable(_in[i].getName()));
} else if (_in[i].getDataType() == DataType.SCALAR) {
// note: even if literal, it might be compiled as scalar placeholder
scalars.add(sec.getScalarInput(_in[i].getName(), _in[i].getValueType(), _in[i].isLiteral()));
}
}
// execute generated operator
if (// CELL
_class.getSuperclass() == SpoofCellwise.class) {
SpoofCellwise op = (SpoofCellwise) CodegenUtils.createInstance(_class);
AggregateOperator aggop = getAggregateOperator(op.getAggOp());
if (_out.getDataType() == DataType.MATRIX) {
// execute codegen block operation
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if ((op.getCellType() == CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) || (op.getCellType() == CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
long numBlocks = (op.getCellType() == CellType.ROW_AGG) ? mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
} else {
// SCALAR
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
}
} else if (// MAGG
_class.getSuperclass() == SpoofMultiAggregate.class) {
SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
AggOp[] aggOps = op.getAggOps();
MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars)).values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps));
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
} else if (// OUTER
_class.getSuperclass() == SpoofOuterProduct.class) {
if (_out.getDataType() == DataType.MATRIX) {
SpoofOperator op = (SpoofOperator) CodegenUtils.createInstance(_class);
OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
// update matrix characteristics
updateOutputMatrixCharacteristics(sec, op);
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if (type == OutProdType.LEFT_OUTER_PRODUCT || type == OutProdType.RIGHT_OUTER_PRODUCT) {
long numBlocks = mcOut.getNumRowBlocks() * mcOut.getNumColBlocks();
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
} else {
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
sec.setVariable(_out.getName(), new DoubleObject(tmp.getValue(0, 0)));
}
} else if (_class.getSuperclass() == SpoofRowwise.class) {
// ROW
if (mcIn.getCols() > mcIn.getColsPerBlock()) {
throw new DMLRuntimeException("Invalid spark rowwise operator w/ ncol=" + mcIn.getCols() + ", ncolpb=" + mcIn.getColsPerBlock() + ".");
}
SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() : op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars, (int) mcIn.getCols(), (int) clen2);
out = in.mapPartitionsToPair(fmmc, op.getRowType() == RowType.ROW_AGG || op.getRowType() == RowType.NO_AGG);
if (op.getRowType().isColumnAgg() || op.getRowType() == RowType.FULL_AGG) {
MatrixBlock tmpMB = RDDAggregateUtils.sumStable(out);
if (op.getRowType().isColumnAgg())
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
else
sec.setScalarOutput(_out.getName(), new DoubleObject(tmpMB.quickGetValue(0, 0)));
} else // row-agg or no-agg
{
if (op.getRowType() == RowType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) {
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), mcIn.getNumRowBlocks()), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
}
} else {
throw new DMLRuntimeException("Operator " + _class.getSuperclass() + " is not supported on Spark");
}
}
Aggregations