use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class QuaternaryOp method constructSparkLopsWeightedCeMM.
private void constructSparkLopsWeightedCeMM(WCeMMType wtype) {
// NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
// Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
// and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
// required because the max_int byte buffer constraint has been fixed in Spark 1.4
double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop eps = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWcemm = (m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
if (// broadcast
!FORCE_REPLICATION && isMapWcemm) {
// map-side wcemm always with broadcast
Lop wcemm = new WeightedCrossEntropy(X.constructLops(), U.constructLops(), V.constructLops(), eps.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, ExecType.SPARK);
setOutputDimensions(wcemm);
setLineNumbers(wcemm);
setLops(wcemm);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
// reduce-side wcemm w/ or without broadcast
Lop wcemm = new WeightedCrossEntropyR(X.constructLops(), U.constructLops(), V.constructLops(), eps.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
setOutputDimensions(wcemm);
setLineNumbers(wcemm);
setLops(wcemm);
}
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class QuaternaryOp method constructSparkLopsWeightedUMM.
private void constructSparkLopsWeightedUMM(WUMMType wtype) {
// NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted UnaryMM only if this constraint holds.
Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
// Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
// and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
// required because the max_int byte buffer constraint has been fixed in Spark 1.4
double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// map-side wumm always with broadcast
Lop wumm = new WeightedUnaryMM(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.SPARK);
setOutputDimensions(wumm);
setLineNumbers(wumm);
setLops(wumm);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
// reduce-side wumm w/ or without broadcast
Lop wumm = new WeightedUnaryMMR(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.SPARK);
setOutputDimensions(wumm);
setLineNumbers(wumm);
setLops(wumm);
}
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class TernaryOp method constructLopsTernaryDefault.
private void constructLopsTernaryDefault() {
ExecType et = optFindExecType();
if (getInput().stream().allMatch(h -> h.getDataType().isScalar()))
// always CP for pure scalar operations
et = ExecType.CP;
Ternary plusmult = null;
if (et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU) {
plusmult = new Ternary(HopsOpOp3Lops.get(_op), getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getDataType(), getValueType(), et);
} else {
// MR
Hop first = getInput().get(0);
Hop second = getInput().get(1);
Hop third = getInput().get(2);
Lop firstLop = first.constructLops();
if (first.getDataType().isMatrix()) {
firstLop = new Group(firstLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(firstLop);
setOutputDimensions(firstLop);
}
Lop secondLop = second.constructLops();
if (second.getDataType().isMatrix()) {
secondLop = new Group(secondLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(secondLop);
setOutputDimensions(secondLop);
}
Lop thirdLop = third.constructLops();
if (third.getDataType().isMatrix()) {
thirdLop = new Group(thirdLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(thirdLop);
setOutputDimensions(thirdLop);
}
plusmult = new Ternary(HopsOpOp3Lops.get(_op), firstLop, secondLop, thirdLop, getDataType(), getValueType(), et);
}
setOutputDimensions(plusmult);
setLineNumbers(plusmult);
setLops(plusmult);
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class UnaryOp method constructLopsSparkCumulativeUnary.
private Lop constructLopsSparkCumulativeUnary() {
Hop input = getInput().get(0);
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK;
OperationTypes aggtype = getCumulativeAggType();
Lop X = input.constructLops();
Lop TEMP = X;
ArrayList<Lop> DATA = new ArrayList<>();
int level = 0;
// recursive preaggregation until aggregates fit into CP memory budget
while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
DATA.add(TEMP);
// preaggregation per block (for spark, the CumulativePartialAggregate subsumes both
// the preaggregation and subsequent block aggregation)
long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK);
preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
setLineNumbers(preagg);
TEMP = preagg;
level++;
// in case of unknowns, generate one level
force = false;
}
// in-memory cum sum (of partial aggregates)
if (TEMP.getOutputParameters().getNumRows() != 1) {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
setLineNumbers(unary1);
TEMP = unary1;
}
// split, group and mr cumsum
while (level-- > 0) {
// (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and
// the subsequent offset binary apply of split aggregates against the original data)
double initValue = getCumulativeInitValue();
CumulativeOffsetBinary binary = new CumulativeOffsetBinary(DATA.get(level), TEMP, DataType.MATRIX, ValueType.DOUBLE, initValue, aggtype, ExecType.SPARK);
binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
setLineNumbers(binary);
TEMP = binary;
}
return TEMP;
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class UnaryOp method constructLops.
@Override
public Lop constructLops() {
// reuse existing lop
if (getLops() != null)
return getLops();
try {
Hop input = getInput().get(0);
if (// value type casts or matrix to scalar
getDataType() == DataType.SCALAR || (_op == OpOp1.CAST_AS_MATRIX && getInput().get(0).getDataType() == DataType.SCALAR) || (_op == OpOp1.CAST_AS_FRAME && getInput().get(0).getDataType() == DataType.SCALAR)) {
if (// special handling IQM
_op == Hop.OpOp1.IQM) {
Lop iqmLop = constructLopsIQM();
setLops(iqmLop);
} else if (_op == Hop.OpOp1.MEDIAN) {
Lop medianLop = constructLopsMedian();
setLops(medianLop);
} else // general case SCALAR/CAST (always in CP)
{
UnaryCP.OperationTypes optype = HopsOpOp1LopsUS.get(_op);
if (optype == null)
throw new HopsException("Unknown UnaryCP lop type for UnaryOp operation type '" + _op + "'");
UnaryCP unary1 = new UnaryCP(input.constructLops(), optype, getDataType(), getValueType());
setOutputDimensions(unary1);
setLineNumbers(unary1);
setLops(unary1);
}
} else // general case MATRIX
{
ExecType et = optFindExecType();
// special handling cumsum/cumprod/cummin/cumsum
if (isCumulativeUnaryOperation() && !(et == ExecType.CP || et == ExecType.GPU)) {
// TODO additional physical operation if offsets fit in memory
Lop cumsumLop = null;
if (et == ExecType.MR)
cumsumLop = constructLopsMRCumulativeUnary();
else
cumsumLop = constructLopsSparkCumulativeUnary();
setLops(cumsumLop);
} else // default unary
{
int k = isCumulativeUnaryOperation() ? OptimizerUtils.getConstrainedNumThreads(_maxNumThreads) : 1;
Unary unary1 = new Unary(input.constructLops(), HopsOpOp1LopsU.get(_op), getDataType(), getValueType(), et, k);
setOutputDimensions(unary1);
setLineNumbers(unary1);
setLops(unary1);
}
}
} catch (Exception e) {
throw new HopsException(this.printErrorLocation() + "error constructing Lops for UnaryOp Hop -- \n ", e);
}
// add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
Aggregations