use of org.apache.sysml.parser.ParForStatementBlock in project incubator-systemml by apache.
the class ParForProgramBlock method executeRemoteSparkParForDP.
private void executeRemoteSparkParForDP(ExecutionContext ec, IntObject itervar, IntObject from, IntObject to, IntObject incr) throws IOException {
Timing time = (_monitor ? new Timing(true) : null);
// Step 0) check and compile to CP (if forced remote parfor)
boolean flagForced = checkMRAndRecompileToCP(0);
// Step 1) prepare partitioned input matrix (needs to happen before serializing the program)
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
MatrixObject inputMatrix = ec.getMatrixObject(_colocatedDPMatrix);
PartitionFormat inputDPF = sb.determineDataPartitionFormat(_colocatedDPMatrix);
// mark matrix var as partitioned
inputMatrix.setPartitioned(inputDPF._dpf, inputDPF._N);
// Step 2) init parallel workers (serialize PBs)
// NOTES: each mapper changes filenames with regard to his ID as we submit a single
// job, cannot reuse serialized string, since variables are serialized as well.
ParForBody body = new ParForBody(_childBlocks, _resultVars, ec);
HashMap<String, byte[]> clsMap = new HashMap<>();
String program = ProgramConverter.serializeParForBody(body, clsMap);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_PARWRK_T, time.stop());
// Step 3) create tasks
TaskPartitioner partitioner = createTaskPartitioner(from, to, incr);
String resultFile = constructResultFileName();
long numIterations = partitioner.getNumIterations();
// partitioner.createTasks().size();
long numCreatedTasks = numIterations;
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_INIT_TASKS_T, time.stop());
// write matrices to HDFS, except DP matrix which is the input to the RemoteDPParForSpark job
exportMatricesToHDFS(ec, _colocatedDPMatrix);
// Step 4) submit MR job (wait for finished work)
// TODO runtime support for binary cell partitioning
OutputInfo inputOI = OutputInfo.BinaryBlockOutputInfo;
RemoteParForJobReturn ret = RemoteDPParForSpark.runJob(_ID, _iterPredVar, _colocatedDPMatrix, program, clsMap, resultFile, inputMatrix, ec, inputDPF, inputOI, _tSparseCol, _enableCPCaching, _numThreads);
if (_monitor)
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_EXEC_T, time.stop());
// Step 5) collecting results from each parallel worker
int numExecutedTasks = ret.getNumExecutedTasks();
int numExecutedIterations = ret.getNumExecutedIterations();
// consolidate results into global symbol table
consolidateAndCheckResults(ec, numIterations, numCreatedTasks, numExecutedIterations, numExecutedTasks, ret.getVariables());
if (// see step 0
flagForced)
releaseForcedRecompile(0);
inputMatrix.unsetPartitioned();
if (_monitor) {
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_WAIT_RESULTS_T, time.stop());
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMTASKS, numExecutedTasks);
StatisticMonitor.putPFStat(_ID, Stat.PARFOR_NUMITERS, numExecutedIterations);
}
}
use of org.apache.sysml.parser.ParForStatementBlock in project incubator-systemml by apache.
the class ParForProgramBlock method handleDataPartitioning.
private void handleDataPartitioning(ExecutionContext ec) {
PDataPartitioner dataPartitioner = _dataPartitioner;
if (dataPartitioner != PDataPartitioner.NONE) {
ParForStatementBlock sb = (ParForStatementBlock) getStatementBlock();
if (sb == null)
throw new DMLRuntimeException("ParFor statement block required for reasoning about data partitioning.");
for (String var : sb.getReadOnlyParentVars()) {
Data dat = ec.getVariable(var);
// partitioning but typically related branches are never executed)
if (dat != null && dat instanceof MatrixObject) {
// unpartitioned input
MatrixObject moVar = (MatrixObject) dat;
PartitionFormat dpf = sb.determineDataPartitionFormat(var);
LOG.trace("PARFOR ID = " + _ID + ", Partitioning read-only input variable " + var + " (format=" + dpf + ", mode=" + _dataPartitioner + ")");
if (dpf != PartitionFormat.NONE) {
if (dataPartitioner != PDataPartitioner.REMOTE_SPARK && dpf.isBlockwise()) {
LOG.warn("PARFOR ID = " + _ID + ", Switching data partitioner from " + dataPartitioner + " to " + PDataPartitioner.REMOTE_SPARK.name() + " for blockwise-n partitioning.");
dataPartitioner = PDataPartitioner.REMOTE_SPARK;
}
Timing ltime = new Timing(true);
// input data partitioning (reuse if possible)
Data dpdatNew = _variablesDPReuse.get(var);
if (// no reuse opportunity
dpdatNew == null) {
DataPartitioner dp = createDataPartitioner(dpf, dataPartitioner, ec);
// disable binary cell for sparse if consumed by MR jobs
if (!OptimizerRuleBased.allowsBinaryCellPartitions(moVar, dpf) || // TODO support for binarycell
OptimizerUtils.isSparkExecutionMode()) {
dp.disableBinaryCell();
}
MatrixObject moVarNew = dp.createPartitionedMatrixObject(moVar, constructDataPartitionsFileName());
dpdatNew = moVarNew;
// skip remaining partitioning logic if not partitioned (e.g., too small)
if (moVar == moVarNew)
// skip to next
continue;
}
ec.setVariable(var, dpdatNew);
// recompile parfor body program
ProgramRecompiler.rFindAndRecompileIndexingHOP(sb, this, var, ec, true);
// store original and partitioned matrix (for reuse if applicable)
_variablesDPOriginal.put(var, moVar);
if (ALLOW_REUSE_PARTITION_VARS && ProgramRecompiler.isApplicableForReuseVariable(sb.getDMLProg(), sb, var)) {
_variablesDPReuse.put(var, dpdatNew);
}
LOG.trace("Partitioning and recompilation done in " + ltime.stop() + "ms");
}
}
}
}
}
use of org.apache.sysml.parser.ParForStatementBlock in project incubator-systemml by apache.
the class Explain method getHopDAG.
private static StringBuilder getHopDAG(StatementBlock sb, StringBuilder nodes, ArrayList<Integer> lines, boolean withSubgraph) {
StringBuilder builder = new StringBuilder();
if (sb instanceof WhileStatementBlock) {
addSubGraphHeader(builder, withSubgraph);
WhileStatementBlock wsb = (WhileStatementBlock) sb;
String label = null;
if (!wsb.getUpdateInPlaceVars().isEmpty())
label = "WHILE (lines " + wsb.getBeginLine() + "-" + wsb.getEndLine() + ") in-place=" + wsb.getUpdateInPlaceVars().toString() + "";
else
label = "WHILE (lines " + wsb.getBeginLine() + "-" + wsb.getEndLine() + ")";
// TODO: Don't show predicate hops for now
// builder.append(explainHop(wsb.getPredicateHops()));
WhileStatement ws = (WhileStatement) sb.getStatement(0);
for (StatementBlock current : ws.getBody()) builder.append(getHopDAG(current, nodes, lines, withSubgraph));
addSubGraphFooter(builder, withSubgraph, label);
} else if (sb instanceof IfStatementBlock) {
addSubGraphHeader(builder, withSubgraph);
IfStatementBlock ifsb = (IfStatementBlock) sb;
String label = "IF (lines " + ifsb.getBeginLine() + "-" + ifsb.getEndLine() + ")";
// TODO: Don't show predicate hops for now
// builder.append(explainHop(ifsb.getPredicateHops(), level+1));
IfStatement ifs = (IfStatement) sb.getStatement(0);
for (StatementBlock current : ifs.getIfBody()) {
builder.append(getHopDAG(current, nodes, lines, withSubgraph));
addSubGraphFooter(builder, withSubgraph, label);
}
if (!ifs.getElseBody().isEmpty()) {
addSubGraphHeader(builder, withSubgraph);
label = "ELSE (lines " + ifsb.getBeginLine() + "-" + ifsb.getEndLine() + ")";
for (StatementBlock current : ifs.getElseBody()) builder.append(getHopDAG(current, nodes, lines, withSubgraph));
addSubGraphFooter(builder, withSubgraph, label);
}
} else if (sb instanceof ForStatementBlock) {
ForStatementBlock fsb = (ForStatementBlock) sb;
addSubGraphHeader(builder, withSubgraph);
String label = "";
if (sb instanceof ParForStatementBlock) {
if (!fsb.getUpdateInPlaceVars().isEmpty())
label = "PARFOR (lines " + fsb.getBeginLine() + "-" + fsb.getEndLine() + ") in-place=" + fsb.getUpdateInPlaceVars().toString() + "";
else
label = "PARFOR (lines " + fsb.getBeginLine() + "-" + fsb.getEndLine() + ")";
} else {
if (!fsb.getUpdateInPlaceVars().isEmpty())
label = "FOR (lines " + fsb.getBeginLine() + "-" + fsb.getEndLine() + ") in-place=" + fsb.getUpdateInPlaceVars().toString() + "";
else
label = "FOR (lines " + fsb.getBeginLine() + "-" + fsb.getEndLine() + ")";
}
// TODO: Don't show predicate hops for now
// if (fsb.getFromHops() != null)
// builder.append(explainHop(fsb.getFromHops(), level+1));
// if (fsb.getToHops() != null)
// builder.append(explainHop(fsb.getToHops(), level+1));
// if (fsb.getIncrementHops() != null)
// builder.append(explainHop(fsb.getIncrementHops(), level+1));
ForStatement fs = (ForStatement) sb.getStatement(0);
for (StatementBlock current : fs.getBody()) builder.append(getHopDAG(current, nodes, lines, withSubgraph));
addSubGraphFooter(builder, withSubgraph, label);
} else if (sb instanceof FunctionStatementBlock) {
FunctionStatement fsb = (FunctionStatement) sb.getStatement(0);
addSubGraphHeader(builder, withSubgraph);
String label = "Function (lines " + fsb.getBeginLine() + "-" + fsb.getEndLine() + ")";
for (StatementBlock current : fsb.getBody()) builder.append(getHopDAG(current, nodes, lines, withSubgraph));
addSubGraphFooter(builder, withSubgraph, label);
} else {
// For generic StatementBlock
if (sb.requiresRecompilation()) {
addSubGraphHeader(builder, withSubgraph);
}
ArrayList<Hop> hopsDAG = sb.getHops();
if (hopsDAG != null && !hopsDAG.isEmpty()) {
Hop.resetVisitStatus(hopsDAG);
for (Hop hop : hopsDAG) builder.append(getHopDAG(hop, nodes, lines, withSubgraph));
Hop.resetVisitStatus(hopsDAG);
}
if (sb.requiresRecompilation()) {
builder.append("style=filled;\n");
builder.append("color=lightgrey;\n");
String label = "(lines " + sb.getBeginLine() + "-" + sb.getEndLine() + ") [recompile=" + sb.requiresRecompilation() + "]";
addSubGraphFooter(builder, withSubgraph, label);
}
}
return builder;
}
use of org.apache.sysml.parser.ParForStatementBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteInjectSparkLoopCheckpointing.
// /////
// REWRITE inject spark loop checkpointing
// /
protected void rewriteInjectSparkLoopCheckpointing(OptNode n) {
// get program blocks of root parfor
Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
ParForStatement fs = (ParForStatement) pfsb.getStatement(0);
ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
boolean applied = false;
try {
// apply hop rewrite inject spark checkpoints (but without context awareness)
RewriteInjectSparkLoopCheckpointing rewrite = new RewriteInjectSparkLoopCheckpointing(false);
ProgramRewriter rewriter = new ProgramRewriter(rewrite);
ProgramRewriteStatus state = new ProgramRewriteStatus();
rewriter.rRewriteStatementBlockHopDAGs(pfsb, state);
fs.setBody(rewriter.rRewriteStatementBlocks(fs.getBody(), state, true));
// recompile if additional checkpoints introduced
if (state.getInjectedCheckpoints()) {
pfpb.setChildBlocks(ProgramRecompiler.generatePartitialRuntimeProgram(pfpb.getProgram(), fs.getBody()));
applied = true;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
LOG.debug(getOptMode() + " OPT: rewrite 'inject spark loop checkpointing' - result=" + applied);
}
use of org.apache.sysml.parser.ParForStatementBlock in project systemml by apache.
the class OptimizerRuleBased method rewriteSetDataPartitioner.
// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
if (n.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
boolean blockwise = false;
// preparations
long id = n.getID();
Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
// search for candidates
boolean apply = false;
if (// only if we are allowed to recompile
OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
(_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
HashMap<String, PartitionFormat> cand2 = new HashMap<>();
for (String c : pfsb.getReadOnlyParentVars()) {
PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
cand2.put(c, dpf);
}
}
apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
if (apply)
partitionedMatrices.putAll(cand2);
}
PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
// NOTE: since partitioning is only applied in case of MR index access, we assume a large
// matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
// potentially unnecessary MR jobs for smaller matrices)
// modify rtprog
pfpb.setDataPartitioner(pdp);
// modify plan
n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
return blockwise;
}
Aggregations