use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetExecutionStategy.
// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
boolean isCPOnly = n.isCPOnly();
boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
ExecType REMOTE = getRemoteExecType();
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
// deciding on the execution strategy
if (// allowed remote parfor execution
ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
(isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
(isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
(isCPOnlyPossible && M2 <= _rm))) {
// at this point all required conditions for REMOTE_MR given, now its an opt decision
// estimated local exploited par
int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
// (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
if (// incl conditional partitioning
2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if problem is large enough and remote parallelism is larger than local
if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
if (!isCPOnly && isCPOnlyPossible) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
if (flagLIX) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if remote data partitioning, because data will be distributed on all nodes
if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
// remote parfor
n.setExecType(REMOTE);
} else // otherwise CP
{
// local parfor
n.setExecType(ExecType.CP);
}
} else // mr instructions in body, or rm too small
{
// local parfor
n.setExecType(ExecType.CP);
}
// actual programblock modification
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
PExecMode mode = n.getExecType().toParForExecMode();
pfpb.setExecMode(mode);
// decide if recompilation according to remote mem budget necessary
boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
return requiresRecompile;
}
use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project incubator-systemml by apache.
the class OptimizerRuleBased method getMaxCPOnlyBudget.
/**
* Calculates the maximum memory needed in a CP only Parfor
* based on the {@link Hop#computeMemEstimate(MemoTable)} } function
* called recursively for the "children" of the parfor {@link OptNode}.
*
* @param n the parfor {@link OptNode}
* @return the maximum memory needed for any operation inside a parfor in CP execution mode
*/
protected double getMaxCPOnlyBudget(OptNode n) {
ExecType et = n.getExecType();
double ret = 0;
if (n.isLeaf() && et != getRemoteExecType()) {
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if (// e.g., -exec=hadoop
h.getForcedExecType() != LopProperties.ExecType.MR && h.getForcedExecType() != LopProperties.ExecType.SPARK) {
double mem = _cost.getLeafNodeEstimate(TestMeasure.MEMORY_USAGE, n, LopProperties.ExecType.CP);
if (mem >= OptimizerUtils.DEFAULT_SIZE) {
// memory estimate for worst case scenario.
// optimistically ignoring this
} else {
ret = Math.max(ret, mem);
}
}
}
if (!n.isLeaf()) {
for (OptNode c : n.getChilds()) {
ret = Math.max(ret, getMaxCPOnlyBudget(c));
}
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.
the class OptimizerRuleBased method rewriteSetDegreeOfParallelism.
// /////
// REWRITE set degree of parallelism
// /
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested) {
ExecType type = n.getExecType();
long id = n.getID();
// special handling for different exec models (CP, MR, MR nested)
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
if (type == ExecType.CP) {
// determine local max parallelism constraint
int kMax = ConfigurationManager.isParallelParFor() ? (n.isCPOnly() ? _lkmaxCP : _lkmaxMR) : 1;
// ensure local memory constraint (for spark more conservative in order to
// prevent unnecessary guarded collect)
double mem = (OptimizerUtils.isSparkExecutionMode() && !n.isCPOnly()) ? _lm / 2 : _lm;
kMax = Math.min(kMax, (int) Math.floor(mem / M));
kMax = Math.max(kMax, 1);
// constrain max parfor parallelism by problem size
int parforK = (int) ((_N < kMax) ? _N : kMax);
// FIXME rework for nested parfor parallelism and body w/o gpu ops
if (DMLScript.USE_ACCELERATOR) {
long perGPUBudget = GPUContextPool.initialGPUMemBudget();
double maxMemUsage = getMaxCPOnlyBudget(n);
if (maxMemUsage < perGPUBudget) {
parforK = GPUContextPool.getDeviceCount();
parforK = Math.min(parforK, (int) _N);
LOG.debug("Setting degree of parallelism + [" + parforK + "] for GPU; per GPU budget :[" + perGPUBudget + "], parfor budget :[" + maxMemUsage + "], max parallelism per GPU : [" + parforK + "]");
}
}
// set parfor degree of parallelism
pfpb.setDegreeOfParallelism(parforK);
n.setK(parforK);
// distribute remaining parallelism
int remainParforK = getRemainingParallelismParFor(kMax, parforK);
int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
rAssignRemainingParallelism(n, remainParforK, remainOpsK);
} else // ExecType.MR/ExecType.SPARK
{
int kMax = -1;
if (flagNested) {
// determine remote max parallelism constraint
// guaranteed <= _N (see nested)
pfpb.setDegreeOfParallelism(_rnk);
n.setK(_rnk);
// per node (CP only inside)
kMax = _rkmax / _rnk;
} else // not nested (default)
{
// determine remote max parallelism constraint
int tmpK = (int) ((_N < _rk) ? _N : _rk);
pfpb.setDegreeOfParallelism(tmpK);
n.setK(tmpK);
// per node (CP only inside)
kMax = _rkmax / tmpK;
}
// ensure remote memory constraint
// guaranteed >= 1 (see exec strategy)
kMax = Math.min(kMax, (int) Math.floor(_rm / M));
if (kMax < 1)
kMax = 1;
// disable nested parallelism, if required
if (!ALLOW_REMOTE_NESTED_PARALLELISM)
kMax = 1;
// distribute remaining parallelism and recompile parallel instructions
rAssignRemainingParallelism(n, kMax, 1);
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)");
}
use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.
the class OptimizerRuleBased method isCPOnlyPossible.
protected boolean isCPOnlyPossible(OptNode n, double memBudget) {
ExecType et = n.getExecType();
boolean ret = (et == ExecType.CP);
if (n.isLeaf() && et == getRemoteExecType()) {
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
if (// e.g., -exec=hadoop
h.getForcedExecType() != LopProperties.ExecType.MR && h.getForcedExecType() != LopProperties.ExecType.SPARK && // integer dims
h.hasValidCPDimsAndSize()) {
double mem = _cost.getLeafNodeEstimate(TestMeasure.MEMORY_USAGE, n, LopProperties.ExecType.CP);
if (mem <= memBudget)
ret = true;
}
}
if (!n.isLeaf())
for (OptNode c : n.getChilds()) {
// early abort if already false
if (!ret)
break;
ret &= isCPOnlyPossible(c, memBudget);
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.
the class OptimizerRuleBased method rewriteSetExecutionStategy.
// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
boolean isCPOnly = n.isCPOnly();
boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
ExecType REMOTE = getRemoteExecType();
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
// deciding on the execution strategy
if (// allowed remote parfor execution
ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
(isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
(isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
(isCPOnlyPossible && M2 <= _rm))) {
// at this point all required conditions for REMOTE_MR given, now its an opt decision
// estimated local exploited par
int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
// (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
if (// incl conditional partitioning
2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if problem is large enough and remote parallelism is larger than local
if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
if (!isCPOnly && isCPOnlyPossible) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
if (flagLIX) {
// remote parfor
n.setExecType(REMOTE);
} else // MR if remote data partitioning, because data will be distributed on all nodes
if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
// remote parfor
n.setExecType(REMOTE);
} else // otherwise CP
{
// local parfor
n.setExecType(ExecType.CP);
}
} else // mr instructions in body, or rm too small
{
// local parfor
n.setExecType(ExecType.CP);
}
// actual programblock modification
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
PExecMode mode = n.getExecType().toParForExecMode();
pfpb.setExecMode(mode);
// decide if recompilation according to remote mem budget necessary
boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
return requiresRecompile;
}
Aggregations