Search in sources :

Example 91 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteSetExportReplicationFactor.

// /////
// REWRITE set export replication factor
// /
/**
 * Increasing the export replication factor is beneficial for remote execution
 * because each task will read the full input data set. This only applies to
 * matrices that are created as in-memory objects before parfor execution.
 *
 * NOTE: this rewrite requires 'set execution strategy' to be executed.
 *
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param vars local variable map
 */
protected void rewriteSetExportReplicationFactor(OptNode n, LocalVariableMap vars) {
    boolean apply = false;
    int replication = -1;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    // decide on the replication factor
    if (n.getExecType() == getRemoteExecType()) {
        apply = true;
        // account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);
        // account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT);
    }
    // modify the runtime plan
    if (apply)
        pfpb.setExportReplicationFactor(replication);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set export replication factor' - result=" + apply + ((apply) ? " (" + replication + ")" : ""));
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 92 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rFindRecursiveParFor.

protected void rFindRecursiveParFor(OptNode n, HashSet<ParForProgramBlock> cand, boolean recContext) {
    // recursive invocation
    if (!n.isLeaf())
        for (OptNode c : n.getChilds()) {
            if (c.getNodeType() == NodeType.FUNCCALL && c.isRecursive())
                rFindRecursiveParFor(c, cand, true);
            else
                rFindRecursiveParFor(c, cand, recContext);
        }
    // add candidate program blocks
    if (recContext && n.getNodeType() == NodeType.PARFOR) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
        cand.add(pfpb);
    }
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 93 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteInjectSparkRepartition.

// /////
// REWRITE inject spark repartition for zipmm
// /
protected void rewriteInjectSparkRepartition(OptNode n, LocalVariableMap vars) {
    // get program blocks of root parfor
    Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
    ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
    ArrayList<String> ret = new ArrayList<>();
    if (// spark exec mode
    OptimizerUtils.isSparkExecutionMode() && // local parfor
    n.getExecType() == ExecType.CP && // at least 2 iterations
    _N > 1) {
        // collect candidates from zipmm spark instructions
        HashSet<String> cand = new HashSet<>();
        rCollectZipmmPartitioningCandidates(n, cand);
        // prune updated candidates
        HashSet<String> probe = new HashSet<>(pfsb.getReadOnlyParentVars());
        for (String var : cand) if (probe.contains(var))
            ret.add(var);
        // prune small candidates
        ArrayList<String> tmp = new ArrayList<>(ret);
        ret.clear();
        for (String var : tmp) if (vars.get(var) instanceof MatrixObject) {
            MatrixObject mo = (MatrixObject) vars.get(var);
            double sp = OptimizerUtils.getSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getNnz());
            double size = OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), sp);
            if (size > OptimizerUtils.getLocalMemBudget())
                ret.add(var);
        }
        // apply rewrite to parfor pb
        if (!ret.isEmpty()) {
            pfpb.setSparkRepartitionVariables(ret);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'inject spark input repartition' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 94 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteRemoveRecursiveParFor.

// /////
// REWRITE remove recursive parfor
// /
protected void rewriteRemoveRecursiveParFor(OptNode n, LocalVariableMap vars) {
    // num removed parfor
    int count = 0;
    // find recursive parfor
    HashSet<ParForProgramBlock> recPBs = new HashSet<>();
    rFindRecursiveParFor(n, recPBs, false);
    if (!recPBs.isEmpty()) {
        // unfold if necessary
        try {
            ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
            if (recPBs.contains(pfpb))
                rFindAndUnfoldRecursiveFunction(n, pfpb, recPBs, vars);
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        // remove recursive parfor (parfor to for)
        count = removeRecursiveParFor(n, recPBs);
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'remove recursive parfor' - result=" + recPBs.size() + "/" + count);
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 95 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean isCPOnly = n.isCPOnly();
    boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
    String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ExecType REMOTE = getRemoteExecType();
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    // deciding on the execution strategy
    if (// allowed remote parfor execution
    ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
    (isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
    (isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
    (isCPOnlyPossible && M2 <= _rm))) {
        // at this point all required conditions for REMOTE_MR given, now its an opt decision
        // estimated local exploited par
        int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
        // (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
        if (// incl conditional partitioning
        2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if problem is large enough and remote parallelism is larger than local
        if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
        if (!isCPOnly && isCPOnlyPossible) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
        if (flagLIX) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if remote data partitioning, because data will be distributed on all nodes
        if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // otherwise CP
        {
            // local parfor
            n.setExecType(ExecType.CP);
        }
    } else // mr instructions in body, or rm too small
    {
        // local parfor
        n.setExecType(ExecType.CP);
    }
    // actual programblock modification
    long id = n.getID();
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    PExecMode mode = n.getExecType().toParForExecMode();
    pfpb.setExecMode(mode);
    // decide if recompilation according to remote mem budget necessary
    boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
    return requiresRecompile;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)105 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)41 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)37 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)35 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)33 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)33 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)31 ArrayList (java.util.ArrayList)22 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)20 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)16 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)16 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)16 HashSet (java.util.HashSet)15 StatementBlock (org.apache.sysml.parser.StatementBlock)11 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)10 Instruction (org.apache.sysml.runtime.instructions.Instruction)10 DMLProgram (org.apache.sysml.parser.DMLProgram)8 IfStatementBlock (org.apache.sysml.parser.IfStatementBlock)8 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)8 Program (org.apache.sysml.runtime.controlprogram.Program)8