use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetDataPartitioner.
// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
if (n.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
boolean blockwise = false;
// preparations
long id = n.getID();
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
// search for candidates
boolean apply = false;
if (// only if we are allowed to recompile
OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
(_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
HashMap<String, PartitionFormat> cand2 = new HashMap<>();
for (String c : pfsb.getReadOnlyParentVars()) {
PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
cand2.put(c, dpf);
}
}
apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
if (apply)
partitionedMatrices.putAll(cand2);
}
PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
// NOTE: since partitioning is only applied in case of MR index access, we assume a large
// matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
// potentially unnecessary MR jobs for smaller matrices)
// modify rtprog
pfpb.setDataPartitioner(pdp);
// modify plan
n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
return blockwise;
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetDegreeOfParallelism.
// /////
// REWRITE set degree of parallelism
// /
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested) {
ExecType type = n.getExecType();
long id = n.getID();
// special handling for different exec models (CP, MR, MR nested)
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
if (type == ExecType.CP) {
// determine local max parallelism constraint
int kMax = ConfigurationManager.isParallelParFor() ? (n.isCPOnly() ? _lkmaxCP : _lkmaxMR) : 1;
// ensure local memory constraint (for spark more conservative in order to
// prevent unnecessary guarded collect)
double mem = (OptimizerUtils.isSparkExecutionMode() && !n.isCPOnly()) ? _lm / 2 : _lm;
kMax = Math.min(kMax, (int) Math.floor(mem / M));
kMax = Math.max(kMax, 1);
// constrain max parfor parallelism by problem size
int parforK = (int) ((_N < kMax) ? _N : kMax);
// FIXME rework for nested parfor parallelism and body w/o gpu ops
if (DMLScript.USE_ACCELERATOR) {
long perGPUBudget = GPUContextPool.initialGPUMemBudget();
double maxMemUsage = getMaxCPOnlyBudget(n);
if (maxMemUsage < perGPUBudget) {
parforK = GPUContextPool.getDeviceCount();
parforK = Math.min(parforK, (int) _N);
LOG.debug("Setting degree of parallelism + [" + parforK + "] for GPU; per GPU budget :[" + perGPUBudget + "], parfor budget :[" + maxMemUsage + "], max parallelism per GPU : [" + parforK + "]");
}
}
// set parfor degree of parallelism
pfpb.setDegreeOfParallelism(parforK);
n.setK(parforK);
// distribute remaining parallelism
int remainParforK = getRemainingParallelismParFor(kMax, parforK);
int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
rAssignRemainingParallelism(n, remainParforK, remainOpsK);
} else // ExecType.MR/ExecType.SPARK
{
int kMax = -1;
if (flagNested) {
// determine remote max parallelism constraint
// guaranteed <= _N (see nested)
pfpb.setDegreeOfParallelism(_rnk);
n.setK(_rnk);
// per node (CP only inside)
kMax = _rkmax / _rnk;
} else // not nested (default)
{
// determine remote max parallelism constraint
int tmpK = (int) ((_N < _rk) ? _N : _rk);
pfpb.setDegreeOfParallelism(tmpK);
n.setK(tmpK);
// per node (CP only inside)
kMax = _rkmax / tmpK;
}
// ensure remote memory constraint
// guaranteed >= 1 (see exec strategy)
kMax = Math.min(kMax, (int) Math.floor(_rm / M));
if (kMax < 1)
kMax = 1;
// disable nested parallelism, if required
if (!ALLOW_REMOTE_NESTED_PARALLELISM)
kMax = 1;
// distribute remaining parallelism and recompile parallel instructions
rAssignRemainingParallelism(n, kMax, 1);
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)");
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rAssignRemainingParallelism.
protected void rAssignRemainingParallelism(OptNode n, int parforK, int opsK) {
ArrayList<OptNode> childs = n.getChilds();
if (childs != null) {
boolean recompileSB = false;
for (OptNode c : childs) {
if (c.getNodeType() == NodeType.PARFOR) {
// constrain max parfor parallelism by problem size
int tmpN = Integer.parseInt(c.getParam(ParamType.NUM_ITERATIONS));
int tmpK = (tmpN < parforK) ? tmpN : parforK;
// set parfor degree of parallelism
long id = c.getID();
c.setK(tmpK);
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
pfpb.setDegreeOfParallelism(tmpK);
// distribute remaining parallelism
int remainParforK = getRemainingParallelismParFor(parforK, tmpK);
int remainOpsK = getRemainingParallelismOps(opsK, tmpK);
rAssignRemainingParallelism(c, remainParforK, remainOpsK);
} else if (c.getNodeType() == NodeType.HOP) {
// set degree of parallelism for multi-threaded leaf nodes
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(c.getID());
if (ConfigurationManager.isParallelMatrixOperations() && // abop, datagenop, qop, paramop
h instanceof MultiThreadedHop && !(// only paramop-grpagg
h instanceof ParameterizedBuiltinOp && !HopRewriteUtils.isValidOp(((ParameterizedBuiltinOp) h).getOp(), ParamBuiltinOp.GROUPEDAGG, ParamBuiltinOp.REXPAND)) && !(// only unaryop-cumulativeagg
h instanceof UnaryOp && !((UnaryOp) h).isCumulativeUnaryOperation()) && !(// only reorgop-transpose
h instanceof ReorgOp && ((ReorgOp) h).getOp() != ReOrgOp.TRANSPOSE)) {
MultiThreadedHop mhop = (MultiThreadedHop) h;
// set max constraint in hop
mhop.setMaxNumThreads(opsK);
// set optnode k (for explain)
c.setK(opsK);
// need to recompile SB, if changed constraint
recompileSB = true;
} else // for all other multi-threaded hops set k=1 to simply debugging
if (h instanceof MultiThreadedHop) {
MultiThreadedHop mhop = (MultiThreadedHop) h;
// set max constraint in hop
mhop.setMaxNumThreads(1);
// set optnode k (for explain)
c.setK(1);
}
} else
rAssignRemainingParallelism(c, parforK, opsK);
}
// recompile statement block if required
if (recompileSB) {
try {
// guaranteed to be a last-level block (see hop change)
ProgramBlock pb = (ProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
Recompiler.recompileProgramBlockInstructions(pb);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
}
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteEnableRuntimePiggybacking.
// /////
// REWRITE enable runtime piggybacking
// /
protected void rewriteEnableRuntimePiggybacking(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices) {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
HashSet<String> sharedVars = new HashSet<>();
boolean apply = false;
// enable runtime piggybacking if MR jobs on shared read-only data set
if (OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING) {
// apply runtime piggybacking if hop in mr and shared input variable
// (any input variabled which is not partitioned and is read only and applies)
apply = rHasSharedMRInput(n, vars.keySet(), partitionedMatrices.keySet(), sharedVars) && // apply only if degree of parallelism > 1
n.getTotalK() > 1;
}
if (apply)
pfpb.setRuntimePiggybacking(apply);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'enable runtime piggybacking' - result=" + apply + " (" + ProgramConverter.serializeStringCollection(sharedVars) + ")");
}
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetResultMerge.
// /////
// REWRITE set result merge
// /
protected void rewriteSetResultMerge(OptNode n, LocalVariableMap vars, boolean inLocal) {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
PResultMerge REMOTE = OptimizerUtils.isSparkExecutionMode() ? PResultMerge.REMOTE_SPARK : PResultMerge.REMOTE_MR;
PResultMerge ret = null;
// investigate details of current parfor node
boolean flagRemoteParFOR = (n.getExecType() == getRemoteExecType());
boolean flagLargeResult = hasLargeTotalResults(n, pfpb.getResultVariables(), vars, true);
boolean flagRemoteLeftIndexing = hasResultMRLeftIndexing(n, pfpb.getResultVariables(), vars, true);
boolean flagCellFormatWoCompare = determineFlagCellFormatWoCompare(pfpb.getResultVariables(), vars);
boolean flagOnlyInMemResults = hasOnlyInMemoryResults(n, pfpb.getResultVariables(), vars, true);
// MR, if remote exec, and w/compare (prevent huge transfer/merge costs)
if (flagRemoteParFOR && flagLargeResult) {
ret = REMOTE;
} else // CP, if all results in mem
if (flagOnlyInMemResults) {
ret = PResultMerge.LOCAL_MEM;
} else // benefit for large matrices outweigths potentially unnecessary MR jobs for smaller matrices)
if ((flagRemoteParFOR || flagRemoteLeftIndexing) && !(flagCellFormatWoCompare && ResultMergeLocalFile.ALLOW_COPY_CELLFILES)) {
ret = REMOTE;
} else // CP, otherwise (decide later if in mem or file-based)
{
ret = PResultMerge.LOCAL_AUTOMATIC;
}
// modify rtprog
pfpb.setResultMerge(ret);
// modify plan
n.addParam(ParamType.RESULT_MERGE, ret.toString());
// recursively apply rewrite for parfor nodes
if (n.getChilds() != null)
rInvokeSetResultMerge(n.getChilds(), vars, inLocal && !flagRemoteParFOR);
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set result merge' - result=" + ret);
}
Aggregations