use of org.apache.sysml.lops.Group in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedCeMM.
private void constructMRLopsWeightedCeMM(WCeMMType wtype) {
// NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop eps = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (// broadcast
!FORCE_REPLICATION && isMapWcemm) {
// partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
// requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
// partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
// requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
// map-side wcemm always with broadcast
Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
// reduce-side wcemm w/ or without broadcast
Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.Group in project incubator-systemml by apache.
the class TernaryOp method constructLopsTernaryDefault.
private void constructLopsTernaryDefault() {
ExecType et = optFindExecType();
if (getInput().stream().allMatch(h -> h.getDataType().isScalar()))
// always CP for pure scalar operations
et = ExecType.CP;
Ternary plusmult = null;
if (et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU) {
plusmult = new Ternary(HopsOpOp3Lops.get(_op), getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getDataType(), getValueType(), et);
} else {
// MR
Hop first = getInput().get(0);
Hop second = getInput().get(1);
Hop third = getInput().get(2);
Lop firstLop = first.constructLops();
if (first.getDataType().isMatrix()) {
firstLop = new Group(firstLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(firstLop);
setOutputDimensions(firstLop);
}
Lop secondLop = second.constructLops();
if (second.getDataType().isMatrix()) {
secondLop = new Group(secondLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(secondLop);
setOutputDimensions(secondLop);
}
Lop thirdLop = third.constructLops();
if (third.getDataType().isMatrix()) {
thirdLop = new Group(thirdLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(thirdLop);
setOutputDimensions(thirdLop);
}
plusmult = new Ternary(HopsOpOp3Lops.get(_op), firstLop, secondLop, thirdLop, getDataType(), getValueType(), et);
}
setOutputDimensions(plusmult);
setLineNumbers(plusmult);
setLops(plusmult);
}
use of org.apache.sysml.lops.Group in project incubator-systemml by apache.
the class UnaryOp method constructLopsMRCumulativeUnary.
/**
* MR Cumsum is currently based on a multipass algorithm of (1) preaggregation and (2) subsequent offsetting.
* Note that we currently support one robust physical operator but many alternative
* realizations are possible for specific scenarios (e.g., when the preaggregated intermediate
* fit into the map task memory budget) or by creating custom job types.
*
* @return low-level operator
*/
private Lop constructLopsMRCumulativeUnary() {
Hop input = getInput().get(0);
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
boolean force = !dimsKnown() || _etypeForced == ExecType.MR;
OperationTypes aggtype = getCumulativeAggType();
Lop X = input.constructLops();
Lop TEMP = X;
ArrayList<Lop> DATA = new ArrayList<>();
int level = 0;
// recursive preaggregation until aggregates fit into CP memory budget
while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
DATA.add(TEMP);
// preaggregation per block
long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
setLineNumbers(preagg);
Group group = new Group(preagg, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
group.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
setLineNumbers(group);
Aggregate agg = new Aggregate(group, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
agg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
// aggregation uses kahanSum but the inputs do not have correction values
agg.setupCorrectionLocation(CorrectionLocationType.NONE);
setLineNumbers(agg);
TEMP = agg;
level++;
// in case of unknowns, generate one level
force = false;
}
// in-memory cum sum (of partial aggregates)
if (TEMP.getOutputParameters().getNumRows() != 1) {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
setLineNumbers(unary1);
TEMP = unary1;
}
// split, group and mr cumsum
while (level-- > 0) {
double init = getCumulativeInitValue();
CumulativeSplitAggregate split = new CumulativeSplitAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, init);
split.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
setLineNumbers(split);
Group group1 = new Group(DATA.get(level), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
setLineNumbers(group1);
Group group2 = new Group(split, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
setLineNumbers(group2);
CumulativeOffsetBinary binary = new CumulativeOffsetBinary(group1, group2, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.MR);
binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
setLineNumbers(binary);
TEMP = binary;
}
return TEMP;
}
use of org.apache.sysml.lops.Group in project incubator-systemml by apache.
the class AggBinaryOp method constructMRLopsCPMM.
private void constructMRLopsCPMM() {
if (isLeftTransposeRewriteApplicable(false, false)) {
setLops(constructMRLopsCPMMWithLeftTransposeRewrite());
} else // general case
{
Hop X = getInput().get(0);
Hop Y = getInput().get(1);
MMCJType type = getMMCJAggregationType(X, Y);
MMCJ mmcj = new MMCJ(X.constructLops(), Y.constructLops(), getDataType(), getValueType(), type, ExecType.MR);
setOutputDimensions(mmcj);
setLineNumbers(mmcj);
Group grp = new Group(mmcj, Group.OperationTypes.Sort, getDataType(), getValueType());
setOutputDimensions(grp);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
setOutputDimensions(agg1);
setLineNumbers(agg1);
// aggregation uses kahanSum but the inputs do not have correction values
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
setLops(agg1);
}
}
use of org.apache.sysml.lops.Group in project incubator-systemml by apache.
the class AggBinaryOp method constructMRLopsMapMMChain.
private void constructMRLopsMapMMChain(ChainType chainType) {
Lop mapmult = null;
if (chainType == ChainType.XtXv) {
// v never needs partitioning because always single block
Hop hX = getInput().get(0).getInput().get(0);
Hop hv = getInput().get(1).getInput().get(1);
// core matrix mult
mapmult = new MapMultChain(hX.constructLops(), hv.constructLops(), getDataType(), getValueType(), ExecType.MR);
mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(mapmult);
} else // ChainType.XtwXv / ChainType.XtXvy
{
// v never needs partitioning because always single block
int wix = (chainType == ChainType.XtwXv) ? 0 : 1;
int vix = (chainType == ChainType.XtwXv) ? 1 : 0;
Hop hX = getInput().get(0).getInput().get(0);
Hop hw = getInput().get(1).getInput().get(wix);
Hop hv = getInput().get(1).getInput().get(vix).getInput().get(1);
double mestW = OptimizerUtils.estimateSize(hw.getDim1(), hw.getDim2());
boolean needPart = !hw.dimsKnown() || hw.getDim1() * hw.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop X = hX.constructLops(), v = hv.constructLops(), w = null;
if (needPart) {
// requires partitioning
w = new DataPartition(hw.constructLops(), DataType.MATRIX, ValueType.DOUBLE, (mestW > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
w.getOutputParameters().setDimensions(hw.getDim1(), hw.getDim2(), getRowsInBlock(), getColsInBlock(), hw.getNnz());
setLineNumbers(w);
} else
w = hw.constructLops();
// core matrix mult
mapmult = new MapMultChain(X, v, w, chainType, getDataType(), getValueType(), ExecType.MR);
mapmult.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(mapmult);
}
// post aggregation
Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
grp.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
setLineNumbers(agg1);
setLops(agg1);
}
Aggregations