use of org.apache.sysml.lops.DataPartition in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.
private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
//reset reblock requirement (see MR aggregate / construct lops)
setRequiresReblock(false);
//determine output dimensions
long outputDim1 = -1, outputDim2 = -1;
Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
long ngroups = ((Data) numGroups).getLongValue();
Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
long inDim1 = input.getOutputParameters().getNumRows();
long inDim2 = input.getOutputParameters().getNumCols();
boolean rowwise = (inDim1 == 1 && inDim2 > 1);
if (rowwise) {
//vector
outputDim1 = ngroups;
outputDim2 = 1;
} else {
//vector or matrix
outputDim1 = inDim2;
outputDim2 = ngroups;
}
}
//construct lops
if (et == ExecType.MR) {
Lop grp_agg = null;
// construct necessary lops: combineBinary/combineTertiary and groupedAgg
boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
if (isWeighted) {
Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
// add the combine lop to parameter list, with a new name "combinedinput"
inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
inputlops.remove(Statement.GAGG_TARGET);
inputlops.remove(Statement.GAGG_GROUPS);
inputlops.remove(Statement.GAGG_WEIGHTS);
grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
setRequiresReblock(true);
} else {
Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
Lop append = null;
//physical operator selection
double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
if (//mapgroupedagg
groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
//pre partitioning
boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
if (needPart) {
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
ExecType.MR;
Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
setLineNumbers(dcinput);
inputlops.put(Statement.GAGG_GROUPS, dcinput);
}
Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
setLineNumbers(grp_agg_m);
//post aggregation
Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
grp_agg = agg1;
//note: no reblock required
} else //general case: groupedagg
{
if (// multi-column-block result matrix
target.getDim2() >= target.getColsInBlock() || // unkown
target.getDim2() <= 0) {
long m1_dim1 = target.getDim1();
long m1_dim2 = target.getDim2();
long m2_dim1 = groups.getDim1();
long m2_dim2 = groups.getDim2();
long m3_dim1 = m1_dim1;
long m3_dim2 = ((m1_dim2 > 0 && m2_dim2 > 0) ? (m1_dim2 + m2_dim2) : -1);
long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
long brlen = target.getRowsInBlock();
long bclen = target.getColsInBlock();
Lop offset = createOffsetLop(target, true);
Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
setOutputDimensions(rep);
setLineNumbers(rep);
Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
setLineNumbers(group1);
Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
setLineNumbers(group2);
append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
setLineNumbers(append);
} else //single-column-block vector or matrix
{
append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
}
// add the combine lop to parameter list, with a new name "combinedinput"
inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
inputlops.remove(Statement.GAGG_TARGET);
inputlops.remove(Statement.GAGG_GROUPS);
grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
setRequiresReblock(true);
}
}
setLineNumbers(grp_agg);
setLops(grp_agg);
} else //CP/Spark
{
Lop grp_agg = null;
if (et == ExecType.CP) {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
} else if (et == ExecType.SPARK) {
//physical operator selection
Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
if (//mapgroupedagg
broadcastGroups && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
//no reblock required (directly output binary block)
} else //groupedagg (w/ or w/o broadcast)
{
grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
setRequiresReblock(true);
}
}
setLineNumbers(grp_agg);
setLops(grp_agg);
}
}
use of org.apache.sysml.lops.DataPartition in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLopsRemoveEmpty.
private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
Hop targetHop = getInput().get(_paramIndexMap.get("target"));
Hop marginHop = getInput().get(_paramIndexMap.get("margin"));
Hop selectHop = (_paramIndexMap.get("select") != null) ? getInput().get(_paramIndexMap.get("select")) : null;
if (et == ExecType.CP || et == ExecType.CP_FILE) {
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
setLops(pbilop);
/*DISABLED CP PMM (see for example, MDA Bivar test, requires size propagation on recompile)
if( et == ExecType.CP && isTargetDiagInput() && marginHop instanceof LiteralOp
&& ((LiteralOp)marginHop).getStringValue().equals("rows")
&& _outputPermutationMatrix ) //SPECIAL CASE SELECTION VECTOR
{
//TODO this special case could be taken into account for memory estimates in order
// to reduce the estimates for the input diag and subsequent matrix multiply
//get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp)input).isPPredOperation());
//step1: compute index vectors
Hop ppred0 = input;
if( !isPPredInput ) { //ppred only if required
ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp("0",0));
HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
ppred0.refreshSizeInformation();
ppred0.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, ppred0);
}
UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0);
HopRewriteUtils.setOutputBlocksizes(cumsum, brlen, bclen);
cumsum.refreshSizeInformation();
cumsum.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, cumsum);
BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
HopRewriteUtils.setOutputBlocksizes(sel, brlen, bclen);
sel.refreshSizeInformation();
sel.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, sel);
Lop loutput = sel.constructLops();
//Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops( loutput );
}
else //GENERAL CASE
{
ParameterizedBuiltin pbilop = new ParameterizedBuiltin( et, inputlops,
HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType());
pbilop.getOutputParameters().setDimensions(getDim1(),getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(pbilop);
setLops(pbilop);
}
*/
} else if (et == ExecType.MR) {
//special compile for mr removeEmpty-diag
if (isTargetDiagInput() && marginHop instanceof LiteralOp && ((LiteralOp) marginHop).getStringValue().equals("rows")) {
//get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp) input).isPPredOperation());
//step1: compute index vectors
Hop ppred0 = input;
if (!isPPredInput) {
//ppred only if required
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
Lop loutput = null;
double mest = AggBinaryOp.getMapmmMemEstimate(input.getDim1(), 1, brlen, bclen, -1, brlen, bclen, brlen, bclen, -1, 1, true);
double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
if (//SPECIAL CASE: SELECTION VECTOR
_outputPermutationMatrix && mest < mbudget) {
BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
loutput = sel.constructLops();
} else //GENERAL CASE: GENERAL PERMUTATION MATRIX
{
//max ensures non-zero entries and at least one output row
BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
seq.setName("tmp4");
HopRewriteUtils.updateHopCharacteristics(seq, brlen, bclen, memo, this);
//step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
//note: weights always the input (even if isPPredInput) because input also includes 0s
TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
table.setOutputBlocksizes(brlen, bclen);
table.refreshSizeInformation();
//force MR
table.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, table);
table.setDisjointInputs(true);
table.setOutputEmptyBlocks(_outputEmptyBlocks);
loutput = table.constructLops();
HopRewriteUtils.removeChildReference(table, input);
}
//Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(loutput);
} else //default mr remove empty
if (et == ExecType.MR) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
long nnz = input.getNnz();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
//construct lops via new partial hop dag and subsequent lops construction
//in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
//Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
//always MR
ppred0.setForcedExecType(ExecType.MR);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
//always MR
emptyInd.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
} else {
emptyInd = selectHop;
emptyInd.setOutputBlocksizes(brlen, bclen);
emptyInd.refreshSizeInformation();
//always MR
emptyInd.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
//Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
//alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
//Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
double mestPM = OptimizerUtils.estimatePartitionedSizeExactSparsity(rlen, 1, brlen, bclen, 1.0);
Lop rmEmpty = null;
//a) broadcast-based PMM (permutation matrix mult)
if (rmRows && rlen > 0 && mestPM < OptimizerUtils.getRemoteMemBudgetMap()) {
boolean needPart = !offsets.dimsKnown() || offsets.getDim1() > DistributedCacheInput.PARTITION_SIZE;
if (needPart) {
//requires partitioning
loffset = new DataPartition(loffset, DataType.MATRIX, ValueType.DOUBLE, (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
loffset.getOutputParameters().setDimensions(rlen, 1, brlen, bclen, rlen);
setLineNumbers(loffset);
}
rmEmpty = new PMMJ(loffset, linput, lmaxdim, getDataType(), getValueType(), needPart, true, ExecType.MR);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
} else //b) general case: repartition-based rmempty
{
boolean requiresRep = ((clen > bclen || clen <= 0) && rmRows) || ((rlen > brlen || rlen <= 0) && !rmRows);
if (requiresRep) {
//ncol of left input (determines num replicates)
Lop pos = createOffsetLop(input, rmRows);
loffset = new RepMat(loffset, pos, rmRows, DataType.MATRIX, ValueType.DOUBLE);
loffset.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
setLineNumbers(loffset);
}
Group group1 = new Group(linput, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group1);
group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
Group group2 = new Group(loffset, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group2);
group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
HashMap<String, Lop> inMap = new HashMap<String, Lop>();
inMap.put("target", group1);
inMap.put("offset", group2);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
rmEmpty = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
}
Group group3 = new Group(rmEmpty, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group3);
group3.getOutputParameters().setDimensions(-1, -1, brlen, bclen, -1);
Aggregate finalagg = new Aggregate(group3, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(finalagg);
setLineNumbers(finalagg);
//Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(finalagg);
}
} else if (et == ExecType.SPARK) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
//construct lops via new partial hop dag and subsequent lops construction
//in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
//Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
//always Spark
ppred0.setForcedExecType(ExecType.SPARK);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
//always Spark
emptyInd.setForcedExecType(ExecType.SPARK);
}
} else {
emptyInd = selectHop;
emptyInd.setOutputBlocksizes(brlen, bclen);
emptyInd.refreshSizeInformation();
//always Spark
emptyInd.setForcedExecType(ExecType.SPARK);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
//Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
//alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
//Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
HashMap<String, Lop> inMap = new HashMap<String, Lop>();
inMap.put("target", linput);
inMap.put("offset", loffset);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
if (!FORCE_DIST_RM_EMPTY && isRemoveEmptyBcSP())
_bRmEmptyBC = true;
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
//Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(pbilop);
//NOTE: in contrast to mr, replication and aggregation handled instruction-local
}
}
use of org.apache.sysml.lops.DataPartition in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedDivMM.
private void constructMRLopsWeightedDivMM(WDivMMType wtype) throws HopsException, LopsException {
//NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
//supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
//by applying the hop rewrite for Weighted DivMM only if this constraint holds.
Hop W = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop X = getInput().get(3);
//MR operator selection, part1
//size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
//size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (//broadcast
!FORCE_REPLICATION && isMapWdivmm) {
//partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
//partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
//map-side wdivmm always with broadcast
Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
setOutputDimensions(wdivmm);
setLineNumbers(wdivmm);
setLops(wdivmm);
} else //general case
{
//MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
setLineNumbers(grpW);
Lop grpX = X.constructLops();
if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
setLineNumbers(grpX);
Lop lU = null;
if (cacheU) {
//partitioning of U for read through distributed cache
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
} else {
//replication of U for shuffle to target block
//ncol of t(V) -> nrow of V determines num replicates
Lop offset = createOffsetLop(V, false);
lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
setLineNumbers(lU);
Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
setLineNumbers(grpU);
lU = grpU;
}
Lop lV = null;
if (cacheV) {
//partitioning of V for read through distributed cache
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
} else {
//replication of t(V) for shuffle to target block
Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(ltV);
//nrow of U determines num replicates
Lop offset = createOffsetLop(U, false);
lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(lV);
Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
setLineNumbers(grpV);
lV = grpV;
}
//reduce-side wdivmm w/ or without broadcast
Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
setOutputDimensions(wdivmm);
setLineNumbers(wdivmm);
setLops(wdivmm);
}
//in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
setOutputDimensions(grp);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
// aggregation uses kahanSum but the inputs do not have correction values
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
setOutputDimensions(agg1);
setLineNumbers(agg1);
setLops(agg1);
}
use of org.apache.sysml.lops.DataPartition in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedCeMM.
private void constructMRLopsWeightedCeMM(WCeMMType wtype) throws HopsException, LopsException {
//NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
//supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
//by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop eps = getInput().get(3);
//MR operator selection, part1
//size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
//size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (//broadcast
!FORCE_REPLICATION && isMapWcemm) {
//partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
//partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
//map-side wcemm always with broadcast
Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else //general case
{
//MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop lU = null;
if (cacheU) {
//partitioning of U for read through distributed cache
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
} else {
//replication of U for shuffle to target block
//ncol of t(V) -> nrow of V determines num replicates
Lop offset = createOffsetLop(V, false);
lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
setLineNumbers(lU);
Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
setLineNumbers(grpU);
lU = grpU;
}
Lop lV = null;
if (cacheV) {
//partitioning of V for read through distributed cache
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
} else {
//replication of t(V) for shuffle to target block
Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(ltV);
//nrow of U determines num replicates
Lop offset = createOffsetLop(U, false);
lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(lV);
Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
setLineNumbers(grpV);
lV = grpV;
}
//reduce-side wcemm w/ or without broadcast
Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.DataPartition in project incubator-systemml by apache.
the class AggBinaryOp method constructMRLopsMapMMWithLeftTransposeRewrite.
private Lop constructMRLopsMapMMWithLeftTransposeRewrite() throws HopsException, LopsException {
//guaranteed to exists
Hop X = getInput().get(0).getInput().get(0);
Hop Y = getInput().get(1);
//right vector transpose CP
Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(tY);
//matrix mult
// If number of columns is smaller than block size then explicit aggregation is not required.
// i.e., entire matrix multiplication can be performed in the mappers.
boolean needAgg = (X.getDim1() <= 0 || X.getDim1() > X.getRowsInBlock());
//R disregarding transpose rewrite
boolean needPart = requiresPartitioning(MMultMethod.MAPMM_R, true);
//pre partitioning
Lop dcinput = null;
if (needPart) {
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(Y.getDim2(), Y.getDim1(), OptimizerUtils.getSparsity(Y.getDim2(), Y.getDim1(), Y.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
ExecType.MR;
dcinput = new DataPartition(tY, DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
dcinput.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(dcinput);
} else
dcinput = tY;
MapMult mapmult = new MapMult(dcinput, X.constructLops(), getDataType(), getValueType(), false, needPart, false);
mapmult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(mapmult);
//post aggregation
Lop mult = null;
if (needAgg) {
Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
grp.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(agg1);
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
mult = agg1;
} else
mult = mapmult;
//result transpose CP
Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
return out;
}
Aggregations