use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetExportReplicationFactor.
// /////
// REWRITE set export replication factor
// /
/**
* Increasing the export replication factor is beneficial for remote execution
* because each task will read the full input data set. This only applies to
* matrices that are created as in-memory objects before parfor execution.
*
* NOTE: this rewrite requires 'set execution strategy' to be executed.
*
* @param n internal representation of a plan alternative for program blocks and instructions
* @param vars local variable map
*/
protected void rewriteSetExportReplicationFactor(OptNode n, LocalVariableMap vars) {
boolean apply = false;
int replication = -1;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
// decide on the replication factor
if (n.getExecType() == getRemoteExecType()) {
apply = true;
// account for problem and cluster constraints
replication = (int) Math.min(_N, _rnk);
// account for internal max constraint (note hadoop will warn if max > 10)
replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT);
}
// modify the runtime plan
if (apply)
pfpb.setExportReplicationFactor(replication);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set export replication factor' - result=" + apply + ((apply) ? " (" + replication + ")" : ""));
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rFindRecursiveParFor.
protected void rFindRecursiveParFor(OptNode n, HashSet<ParForProgramBlock> cand, boolean recContext) {
// recursive invocation
if (!n.isLeaf())
for (OptNode c : n.getChilds()) {
if (c.getNodeType() == NodeType.FUNCCALL && c.isRecursive())
rFindRecursiveParFor(c, cand, true);
else
rFindRecursiveParFor(c, cand, recContext);
}
// add candidate program blocks
if (recContext && n.getNodeType() == NodeType.PARFOR) {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
cand.add(pfpb);
}
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteInjectSparkRepartition.
// /////
// REWRITE inject spark repartition for zipmm
// /
protected void rewriteInjectSparkRepartition(OptNode n, LocalVariableMap vars) {
// get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
ArrayList<String> ret = new ArrayList<>();
if (// spark exec mode
OptimizerUtils.isSparkExecutionMode() && // local parfor
n.getExecType() == ExecType.CP && // at least 2 iterations
_N > 1) {
// collect candidates from zipmm spark instructions
HashSet<String> cand = new HashSet<>();
rCollectZipmmPartitioningCandidates(n, cand);
// prune updated candidates
HashSet<String> probe = new HashSet<>(pfsb.getReadOnlyParentVars());
for (String var : cand) if (probe.contains(var))
ret.add(var);
// prune small candidates
ArrayList<String> tmp = new ArrayList<>(ret);
ret.clear();
for (String var : tmp) if (vars.get(var) instanceof MatrixObject) {
MatrixObject mo = (MatrixObject) vars.get(var);
double sp = OptimizerUtils.getSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getNnz());
double size = OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), sp);
if (size > OptimizerUtils.getLocalMemBudget())
ret.add(var);
}
// apply rewrite to parfor pb
if (!ret.isEmpty()) {
pfpb.setSparkRepartitionVariables(ret);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'inject spark input repartition' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteRemoveRecursiveParFor.
// /////
// REWRITE remove recursive parfor
// /
protected void rewriteRemoveRecursiveParFor(OptNode n, LocalVariableMap vars) {
// num removed parfor
int count = 0;
// find recursive parfor
HashSet<ParForProgramBlock> recPBs = new HashSet<>();
rFindRecursiveParFor(n, recPBs, false);
if (!recPBs.isEmpty()) {
// unfold if necessary
try {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
if (recPBs.contains(pfpb))
rFindAndUnfoldRecursiveFunction(n, pfpb, recPBs, vars);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// remove recursive parfor (parfor to for)
count = removeRecursiveParFor(n, recPBs);
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'remove recursive parfor' - result=" + recPBs.size() + "/" + count);
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetExecutionStategy.
// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
boolean isCPOnly = n.isCPOnly();
boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
ExecType REMOTE = getRemoteExecType();
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
// deciding on the execution strategy
if (// allowed remote parfor execution
ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
(isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
(isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
(isCPOnlyPossible && M2 <= _rm))) {
// at this point all required conditions for REMOTE_MR given, now its an opt decision
// estimated local exploited par
int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
// (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
if (// incl conditional partitioning
2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if problem is large enough and remote parallelism is larger than local
if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
if (!isCPOnly && isCPOnlyPossible) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
if (flagLIX) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if remote data partitioning, because data will be distributed on all nodes
if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
// remote parfor
n.setExecType(REMOTE);
} else // otherwise CP
{
// local parfor
n.setExecType(ExecType.CP);
}
} else // mr instructions in body, or rm too small
{
// local parfor
n.setExecType(ExecType.CP);
}
// actual programblock modification
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
PExecMode mode = n.getExecType().toParForExecMode();
pfpb.setExecMode(mode);
// decide if recompilation according to remote mem budget necessary
boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
return requiresRecompile;
}
Aggregations