use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetFusedDataPartitioningExecution.
// /////
// REWRITE set fused data partitioning / execution
// /
/**
* This dedicated execution mode can only be applied if all of the
* following conditions are true:
* - Only cp instructions in the parfor body
* - Only one partitioned input
* - number of iterations is equal to number of partitions (nrow/ncol)
* - partitioned matrix access via plain iteration variables (no composed expressions)
* (this ensures that each partition is exactly read once)
* - no left indexing (since by default static task partitioning)
*
* Furthermore, it should be only chosen if we already decided for remote partitioning
* and otherwise would create a large number of partition files.
*
* NOTE: We already respect the reducer memory budget for plan correctness. However,
* we miss optimization potential if the reducer budget is larger than the mapper budget
* (if we were not able to select REMOTE_MR as execution strategy wrt mapper budget)
* TODO modify 'set exec strategy' and related rewrites for conditional data partitioning.
*
* @param pn internal representation of a plan alternative for program blocks and instructions
* @param M ?
* @param flagLIX ?
* @param partitionedMatrices map of data partition formats
* @param vars local variable map
*/
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
// assertions (warnings of corrupt optimizer decisions)
if (pn.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Fused data partitioning and execution is only applicable for a ParFor node.");
boolean apply = false;
String partitioner = pn.getParam(ParamType.DATA_PARTITIONER);
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PExecMode REMOTE_DPE = OptimizerUtils.isSparkExecutionMode() ? PExecMode.REMOTE_SPARK_DP : PExecMode.REMOTE_MR_DP;
// try to merge MR data partitioning and MR exec
if ((// fits into remote memory of reducers
pn.getExecType() == ExecType.MR && M < _rm2 || // MR/SP EXEC and CP body
pn.getExecType() == ExecType.SPARK) && partitioner != null && // MR/SP partitioning
partitioner.equals(REMOTE_DP.toString()) && // only one partitioned matrix
partitionedMatrices.size() == 1) {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
// partitioned matrix
String moVarname = partitionedMatrices.keySet().iterator().next();
PartitionFormat moDpf = partitionedMatrices.get(moVarname);
MatrixObject mo = (MatrixObject) vars.get(moVarname);
if (rIsAccessByIterationVariable(pn, moVarname, pfpb.getIterVar()) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
int k = (int) Math.min(_N, _rk2);
pn.addParam(ParamType.DATA_PARTITIONER, REMOTE_DPE.toString() + "(fused)");
pn.setK(k);
// set fused exec type
pfpb.setExecMode(REMOTE_DPE);
pfpb.setDataPartitioner(PDataPartitioner.NONE);
pfpb.enableColocatedPartitionedMatrix(moVarname);
pfpb.setDegreeOfParallelism(k);
apply = true;
}
}
LOG.debug(getOptMode() + " OPT: rewrite 'set fused data partitioning and execution' - result=" + apply);
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteRemoveRecursiveParFor.
// /////
// REWRITE remove recursive parfor
// /
protected void rewriteRemoveRecursiveParFor(OptNode n, LocalVariableMap vars) {
// num removed parfor
int count = 0;
// find recursive parfor
HashSet<ParForProgramBlock> recPBs = new HashSet<>();
rFindRecursiveParFor(n, recPBs, false);
if (!recPBs.isEmpty()) {
// unfold if necessary
try {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if (recPBs.contains(pfpb))
rFindAndUnfoldRecursiveFunction(n, pfpb, recPBs, vars);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// remove recursive parfor (parfor to for)
count = removeRecursiveParFor(n, recPBs);
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'remove recursive parfor' - result=" + recPBs.size() + "/" + count);
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rFindRecursiveParFor.
protected void rFindRecursiveParFor(OptNode n, HashSet<ParForProgramBlock> cand, boolean recContext) {
// recursive invocation
if (!n.isLeaf())
for (OptNode c : n.getChilds()) {
if (c.getNodeType() == NodeType.FUNCCALL && c.isRecursive())
rFindRecursiveParFor(c, cand, true);
else
rFindRecursiveParFor(c, cand, recContext);
}
// add candidate program blocks
if (recContext && n.getNodeType() == NodeType.PARFOR) {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
cand.add(pfpb);
}
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method setTaskPartitioner.
protected void setTaskPartitioner(OptNode n, PTaskPartitioner partitioner) {
long id = n.getID();
// modify rtprog
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
pfpb.setTaskPartitioner(partitioner);
// modify plan
n.addParam(ParamType.TASK_PARTITIONER, partitioner.toString());
// handle specific case of LIX recompile
boolean flagLIX = (partitioner == PTaskPartitioner.FACTORING_CMAX);
if (flagLIX) {
long maxc = n.getMaxC(_N);
// used as constraint
pfpb.setTaskSize(maxc);
pfpb.disableJVMReuse();
n.addParam(ParamType.TASK_SIZE, String.valueOf(maxc));
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set task partitioner' - result=" + partitioner + ((flagLIX) ? "," + n.getParam(ParamType.TASK_SIZE) : ""));
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetDataPartitioner.
// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
if (n.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
boolean blockwise = false;
// preparations
long id = n.getID();
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
// search for candidates
boolean apply = false;
if (// only if we are allowed to recompile
OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
(_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
HashMap<String, PartitionFormat> cand2 = new HashMap<>();
for (String c : pfsb.getReadOnlyParentVars()) {
PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
cand2.put(c, dpf);
}
}
apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
if (apply)
partitionedMatrices.putAll(cand2);
}
PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
// NOTE: since partitioning is only applied in case of MR index access, we assume a large
// matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
// potentially unnecessary MR jobs for smaller matrices)
// modify rtprog
pfpb.setDataPartitioner(pdp);
// modify plan
n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
return blockwise;
}
Aggregations