use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetTranposeSparseVectorOperations.
// /////
// REWRITE transpose sparse vector operations
// /
protected void rewriteSetTranposeSparseVectorOperations(OptNode pn, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
// assertions (warnings of corrupt optimizer decisions)
if (pn.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Transpose sparse vector operations is only applicable for a ParFor node.");
boolean apply = false;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
if (pfpb.getExecMode() == PExecMode.REMOTE_MR_DP && // general applicable
partitionedMatrices.size() == 1) {
String moVarname = partitionedMatrices.keySet().iterator().next();
PartitionFormat moDpf = partitionedMatrices.get(moVarname);
Data dat = vars.get(moVarname);
if (dat != null && dat instanceof MatrixObject && moDpf == PartitionFormat.COLUMN_WISE && // check for sparse matrix
((MatrixObject) dat).getSparsity() <= MatrixBlock.SPARSITY_TURN_POINT && // tranpose-safe
rIsTransposeSafePartition(pn, moVarname)) {
apply = true;
LOG.debug(getOptMode() + " OPT: rewrite 'set transpose sparse vector operations' - result=" + apply);
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetRecompileMemoryBudget.
// /////
// REWRITE set recompile memory budget
// /
protected void rewriteSetRecompileMemoryBudget(OptNode n) {
double newLocalMem = _lm;
// check et because recompilation only happens at the master node
if (n.getExecType() == ExecType.CP) {
// compute local recompile memory budget
int par = n.getTotalK();
newLocalMem = _lm / par;
// modify runtime plan
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
LOG.debug(getOptMode() + " OPT: rewrite 'set recompile memory budget' - result=" + toMB(newLocalMem));
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rFindAndUnfoldRecursiveFunction.
protected void rFindAndUnfoldRecursiveFunction(OptNode n, ParForProgramBlock parfor, HashSet<ParForProgramBlock> recPBs, LocalVariableMap vars) {
// unfold if found
if (n.getNodeType() == NodeType.FUNCCALL && n.isRecursive()) {
boolean exists = rContainsNode(n, parfor);
if (exists) {
String fnameKey = n.getParam(ParamType.OPSTRING);
String[] names = fnameKey.split(Program.KEY_DELIM);
String fnamespace = names[0];
String fname = names[1];
String fnameNew = FUNCTION_UNFOLD_NAMEPREFIX + fname;
// unfold function
FunctionOp fop = (FunctionOp) OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
Program prog = parfor.getProgram();
DMLProgram dmlprog = parfor.getStatementBlock().getDMLProg();
FunctionProgramBlock fpb = prog.getFunctionProgramBlock(fnamespace, fname);
FunctionProgramBlock copyfpb = ProgramConverter.createDeepCopyFunctionProgramBlock(fpb, new HashSet<String>(), new HashSet<String>());
prog.addFunctionProgramBlock(fnamespace, fnameNew, copyfpb);
dmlprog.addFunctionStatementBlock(fnamespace, fnameNew, (FunctionStatementBlock) copyfpb.getStatementBlock());
// replace function names in old subtree (link to new function)
rReplaceFunctionNames(n, fname, fnameNew);
// recreate sub opttree
String fnameNewKey = fnamespace + Program.KEY_DELIM + fnameNew;
OptNode nNew = new OptNode(NodeType.FUNCCALL);
OptTreeConverter.getAbstractPlanMapping().putHopMapping(fop, nNew);
nNew.addParam(ParamType.OPSTRING, fnameNewKey);
long parentID = OptTreeConverter.getAbstractPlanMapping().getMappedParentID(n.getID());
OptTreeConverter.getAbstractPlanMapping().getOptNode(parentID).exchangeChild(n, nNew);
HashSet<String> memo = new HashSet<>();
// required if functionop not shared (because not replaced yet)
// requied if functionop shared (indirectly replaced)
for (int i = 0; i < copyfpb.getChildBlocks().size(); /*&& i<len*/
i++) {
ProgramBlock lpb = copyfpb.getChildBlocks().get(i);
StatementBlock lsb = lpb.getStatementBlock();
nNew.addChild(OptTreeConverter.rCreateAbstractOptNode(lsb, lpb, vars, false, memo));
// compute delta for recPB set (use for removing parfor)
recPBs.removeAll(rGetAllParForPBs(n, new HashSet<ParForProgramBlock>()));
recPBs.addAll(rGetAllParForPBs(nNew, new HashSet<ParForProgramBlock>()));
// replace function names in new subtree (recursive link to new function)
rReplaceFunctionNames(nNew, fname, fnameNew);
// recursive invocation (only for non-recursive functions)
if (!n.isLeaf())
for (OptNode c : n.getChilds()) rFindAndUnfoldRecursiveFunction(c, parfor, recPBs, vars);
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rAssignRemainingParallelism.
protected void rAssignRemainingParallelism(OptNode n, int parforK, int opsK) {
ArrayList<OptNode> childs = n.getChilds();
if (childs != null) {
boolean recompileSB = false;
for (OptNode c : childs) {
if (c.getNodeType() == NodeType.PARFOR) {
// constrain max parfor parallelism by problem size
int tmpN = Integer.parseInt(c.getParam(ParamType.NUM_ITERATIONS));
int tmpK = (tmpN < parforK) ? tmpN : parforK;
// set parfor degree of parallelism
long id = c.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
// distribute remaining parallelism
int remainParforK = getRemainingParallelismParFor(parforK, tmpK);
int remainOpsK = getRemainingParallelismOps(opsK, tmpK);
rAssignRemainingParallelism(c, remainParforK, remainOpsK);
} else if (c.getNodeType() == NodeType.HOP) {
// set degree of parallelism for multi-threaded leaf nodes
Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(c.getID());
if (ConfigurationManager.isParallelMatrixOperations() && // abop, datagenop, qop, paramop
h instanceof MultiThreadedHop && !(// only paramop-grpagg
h instanceof ParameterizedBuiltinOp && !HopRewriteUtils.isValidOp(((ParameterizedBuiltinOp) h).getOp(), ParamBuiltinOp.GROUPEDAGG, ParamBuiltinOp.REXPAND)) && !(// only unaryop-cumulativeagg
h instanceof UnaryOp && !((UnaryOp) h).isCumulativeUnaryOperation()) && !(// only reorgop-transpose
h instanceof ReorgOp && ((ReorgOp) h).getOp() != ReOrgOp.TRANSPOSE)) {
MultiThreadedHop mhop = (MultiThreadedHop) h;
// set max constraint in hop
// set optnode k (for explain)
// need to recompile SB, if changed constraint
recompileSB = true;
} else // for all other multi-threaded hops set k=1 to simply debugging
if (h instanceof MultiThreadedHop) {
MultiThreadedHop mhop = (MultiThreadedHop) h;
// set max constraint in hop
// set optnode k (for explain)
} else
rAssignRemainingParallelism(c, parforK, opsK);
// recompile statement block if required
if (recompileSB) {
try {
// guaranteed to be a last-level block (see hop change)
ProgramBlock pb = (ProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetExecutionStategy.
// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
boolean isCPOnly = n.isCPOnly();
boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
ExecType REMOTE = getRemoteExecType();
PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
// deciding on the execution strategy
if (// allowed remote parfor execution
ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
(isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
(isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
(isCPOnlyPossible && M2 <= _rm))) {
// at this point all required conditions for REMOTE_MR given, now its an opt decision
// estimated local exploited par
int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
// (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
if (// incl conditional partitioning
2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
// remote parfor
} else // MR if problem is large enough and remote parallelism is larger than local
if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
// remote parfor
} else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
if (!isCPOnly && isCPOnlyPossible) {
// remote parfor
} else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
if (flagLIX) {
// remote parfor
} else // MR if remote data partitioning, because data will be distributed on all nodes
if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
// remote parfor
} else // otherwise CP
// local parfor
} else // mr instructions in body, or rm too small
// local parfor
// actual programblock modification
long id = n.getID();
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
PExecMode mode = n.getExecType().toParForExecMode();
// decide if recompilation according to remote mem budget necessary
boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
return requiresRecompile;