Search in sources :

Example 21 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetResultMerge.

// /////
// REWRITE set result merge
// /
protected void rewriteSetResultMerge(OptNode n, LocalVariableMap vars, boolean inLocal) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    PResultMerge REMOTE = OptimizerUtils.isSparkExecutionMode() ? PResultMerge.REMOTE_SPARK : PResultMerge.REMOTE_MR;
    PResultMerge ret = null;
    // investigate details of current parfor node
    boolean flagRemoteParFOR = (n.getExecType() == getRemoteExecType());
    boolean flagLargeResult = hasLargeTotalResults(n, pfpb.getResultVariables(), vars, true);
    boolean flagRemoteLeftIndexing = hasResultMRLeftIndexing(n, pfpb.getResultVariables(), vars, true);
    boolean flagCellFormatWoCompare = determineFlagCellFormatWoCompare(pfpb.getResultVariables(), vars);
    boolean flagOnlyInMemResults = hasOnlyInMemoryResults(n, pfpb.getResultVariables(), vars, true);
    // MR, if remote exec, and w/compare (prevent huge transfer/merge costs)
    if (flagRemoteParFOR && flagLargeResult) {
        ret = REMOTE;
    } else // CP, if all results in mem
    if (flagOnlyInMemResults) {
        ret = PResultMerge.LOCAL_MEM;
    } else // benefit for large matrices outweigths potentially unnecessary MR jobs for smaller matrices)
    if ((flagRemoteParFOR || flagRemoteLeftIndexing) && !(flagCellFormatWoCompare && ResultMergeLocalFile.ALLOW_COPY_CELLFILES)) {
        ret = REMOTE;
    } else // CP, otherwise (decide later if in mem or file-based)
        ret = PResultMerge.LOCAL_AUTOMATIC;
    // modify rtprog
    // modify plan
    n.addParam(ParamType.RESULT_MERGE, ret.toString());
    // recursively apply rewrite for parfor nodes
    if (n.getChilds() != null)
        rInvokeSetResultMerge(n.getChilds(), vars, inLocal && !flagRemoteParFOR);
    LOG.debug(getOptMode() + " OPT: rewrite 'set result merge' - result=" + ret);
Also used : PResultMerge(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PResultMerge) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 22 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetExportReplicationFactor.

// /////
// REWRITE set export replication factor
// /
 * Increasing the export replication factor is beneficial for remote execution
 * because each task will read the full input data set. This only applies to
 * matrices that are created as in-memory objects before parfor execution.
 * NOTE: this rewrite requires 'set execution strategy' to be executed.
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param vars local variable map
protected void rewriteSetExportReplicationFactor(OptNode n, LocalVariableMap vars) {
    boolean apply = false;
    int replication = -1;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    // decide on the replication factor
    if (n.getExecType() == getRemoteExecType()) {
        apply = true;
        // account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);
        // account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT);
    // modify the runtime plan
    if (apply)
    LOG.debug(getOptMode() + " OPT: rewrite 'set export replication factor' - result=" + apply + ((apply) ? " (" + replication + ")" : ""));
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 23 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetResultPartitioning.

// /////
// REWRITE set result partitioning
// /
protected boolean rewriteSetResultPartitioning(OptNode n, double M, LocalVariableMap vars) {
    // preparations
    long id = n.getID();
    Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
    ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
    // search for candidates
    Collection<OptNode> cand = n.getNodeList(getRemoteExecType());
    // determine if applicable
    boolean apply = // ops fit in remote memory budget
    M < _rm && // at least one MR
    !cand.isEmpty() && isResultPartitionableAll(cand, pfpb.getResultVariables(), vars, // check candidates
    // recompile LIX
    if (apply) {
        try {
            for (OptNode lix : cand) recompileLIX(lix, vars);
        } catch (Exception ex) {
            throw new DMLRuntimeException("Unable to recompile LIX.", ex);
    LOG.debug(getOptMode() + " OPT: rewrite 'set result partitioning' - result=" + apply);
    return apply;
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject( DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 24 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetDegreeOfParallelism.

// /////
// REWRITE set degree of parallelism
// /
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested) {
    ExecType type = n.getExecType();
    long id = n.getID();
    // special handling for different exec models (CP, MR, MR nested)
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    if (type == ExecType.CP) {
        // determine local max parallelism constraint
        int kMax = ConfigurationManager.isParallelParFor() ? (n.isCPOnly() ? _lkmaxCP : _lkmaxMR) : 1;
        // ensure local memory constraint (for spark more conservative in order to
        // prevent unnecessary guarded collect)
        double mem = (OptimizerUtils.isSparkExecutionMode() && !n.isCPOnly()) ? _lm / 2 : _lm;
        kMax = Math.min(kMax, (int) Math.floor(mem / M));
        kMax = Math.max(kMax, 1);
        // constrain max parfor parallelism by problem size
        int parforK = (int) ((_N < kMax) ? _N : kMax);
        // FIXME rework for nested parfor parallelism and body w/o gpu ops
        if (DMLScript.USE_ACCELERATOR) {
            long perGPUBudget = GPUContextPool.initialGPUMemBudget();
            double maxMemUsage = getMaxCPOnlyBudget(n);
            if (maxMemUsage < perGPUBudget) {
                parforK = GPUContextPool.getDeviceCount();
                parforK = Math.min(parforK, (int) _N);
                LOG.debug("Setting degree of parallelism + [" + parforK + "] for GPU; per GPU budget :[" + perGPUBudget + "], parfor budget :[" + maxMemUsage + "],  max parallelism per GPU : [" + parforK + "]");
        // set parfor degree of parallelism
        // distribute remaining parallelism
        int remainParforK = getRemainingParallelismParFor(kMax, parforK);
        int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
        rAssignRemainingParallelism(n, remainParforK, remainOpsK);
    } else // ExecType.MR/ExecType.SPARK
        int kMax = -1;
        if (flagNested) {
            // determine remote max parallelism constraint
            // guaranteed <= _N (see nested)
            // per node (CP only inside)
            kMax = _rkmax / _rnk;
        } else // not nested (default)
            // determine remote max parallelism constraint
            int tmpK = (int) ((_N < _rk) ? _N : _rk);
            // per node (CP only inside)
            kMax = _rkmax / tmpK;
        // ensure remote memory constraint
        // guaranteed >= 1 (see exec strategy)
        kMax = Math.min(kMax, (int) Math.floor(_rm / M));
        if (kMax < 1)
            kMax = 1;
        // disable nested parallelism, if required
            kMax = 1;
        // distribute remaining parallelism and recompile parallel instructions
        rAssignRemainingParallelism(n, kMax, 1);
    LOG.debug(getOptMode() + " OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)");
Also used : ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 25 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class ProgramConverter method createDeepCopyParForProgramBlock.

public static ParForProgramBlock createDeepCopyParForProgramBlock(ParForProgramBlock pfpb, long pid, int IDPrefix, Program prog, HashSet<String> fnStack, HashSet<String> fnCreated, boolean plain, boolean forceDeepCopy) {
    ParForProgramBlock tmpPB = null;
    if (// still on master node
    IDPrefix == -1)
        tmpPB = new ParForProgramBlock(prog, pfpb.getIterVar(), pfpb.getParForParams(), pfpb.getResultVariables());
        // child of remote ParWorker at any level
        tmpPB = new ParForProgramBlock(IDPrefix, prog, pfpb.getIterVar(), pfpb.getParForParams(), pfpb.getResultVariables());
    tmpPB.setStatementBlock(createForStatementBlockCopy((ForStatementBlock) pfpb.getStatementBlock(), pid, plain, forceDeepCopy));
    // already done in top-level parfor
    // already done in top-level parfor
    tmpPB.setFromInstructions(createDeepCopyInstructionSet(pfpb.getFromInstructions(), pid, IDPrefix, prog, fnStack, fnCreated, plain, true));
    tmpPB.setToInstructions(createDeepCopyInstructionSet(pfpb.getToInstructions(), pid, IDPrefix, prog, fnStack, fnCreated, plain, true));
    tmpPB.setIncrementInstructions(createDeepCopyInstructionSet(pfpb.getIncrementInstructions(), pid, IDPrefix, prog, fnStack, fnCreated, plain, true));
    tmpPB.setExitInstructions(createDeepCopyInstructionSet(pfpb.getExitInstructions(), pid, IDPrefix, prog, fnStack, fnCreated, plain, true));
    // and (2) leave placeholders as they are. However, if plain, an explicit deep copy is requested.
    if (plain || forceDeepCopy)
        tmpPB.setChildBlocks(rcreateDeepCopyProgramBlocks(pfpb.getChildBlocks(), pid, IDPrefix, fnStack, fnCreated, plain, forceDeepCopy));
    return tmpPB;
Also used : ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) ForStatementBlock(org.apache.sysml.parser.ForStatementBlock) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)


ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)57 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)24 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)22 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)21 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)20 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)20 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)17 ArrayList (java.util.ArrayList)15 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)9 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)9 RDDObject ( HashSet (java.util.HashSet)8 StatementBlock (org.apache.sysml.parser.StatementBlock)6 Instruction (org.apache.sysml.runtime.instructions.Instruction)6 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)5 Data (org.apache.sysml.runtime.instructions.cp.Data)5 Hop (org.apache.sysml.hops.Hop)4 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)4 DMLProgram (org.apache.sysml.parser.DMLProgram)4