Search in sources :

Example 6 with ExecType

use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean isCPOnly = n.isCPOnly();
    boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
    String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ExecType REMOTE = getRemoteExecType();
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    // deciding on the execution strategy
    if (// allowed remote parfor execution
    ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
    (isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
    (isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
    (isCPOnlyPossible && M2 <= _rm))) {
        // at this point all required conditions for REMOTE_MR given, now its an opt decision
        // estimated local exploited par
        int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
        // (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
        if (// incl conditional partitioning
        2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if problem is large enough and remote parallelism is larger than local
        if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
        if (!isCPOnly && isCPOnlyPossible) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
        if (flagLIX) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if remote data partitioning, because data will be distributed on all nodes
        if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // otherwise CP
        {
            // local parfor
            n.setExecType(ExecType.CP);
        }
    } else // mr instructions in body, or rm too small
    {
        // local parfor
        n.setExecType(ExecType.CP);
    }
    // actual programblock modification
    long id = n.getID();
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    PExecMode mode = n.getExecType().toParForExecMode();
    pfpb.setExecMode(mode);
    // decide if recompilation according to remote mem budget necessary
    boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
    return requiresRecompile;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 7 with ExecType

use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project incubator-systemml by apache.

the class OptimizerRuleBased method getMaxCPOnlyBudget.

/**
 * Calculates the maximum memory needed in a CP only Parfor
 * based on the {@link Hop#computeMemEstimate(MemoTable)}  } function
 * called recursively for the "children" of the parfor {@link OptNode}.
 *
 * @param n the parfor {@link OptNode}
 * @return the maximum memory needed for any operation inside a parfor in CP execution mode
 */
protected double getMaxCPOnlyBudget(OptNode n) {
    ExecType et = n.getExecType();
    double ret = 0;
    if (n.isLeaf() && et != getRemoteExecType()) {
        Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
        if (// e.g., -exec=hadoop
        h.getForcedExecType() != LopProperties.ExecType.MR && h.getForcedExecType() != LopProperties.ExecType.SPARK) {
            double mem = _cost.getLeafNodeEstimate(TestMeasure.MEMORY_USAGE, n, LopProperties.ExecType.CP);
            if (mem >= OptimizerUtils.DEFAULT_SIZE) {
            // memory estimate for worst case scenario.
            // optimistically ignoring this
            } else {
                ret = Math.max(ret, mem);
            }
        }
    }
    if (!n.isLeaf()) {
        for (OptNode c : n.getChilds()) {
            ret = Math.max(ret, getMaxCPOnlyBudget(c));
        }
    }
    return ret;
}
Also used : Hop(org.apache.sysml.hops.Hop) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType)

Example 8 with ExecType

use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.

the class OptimizerRuleBased method rewriteSetDegreeOfParallelism.

// /////
// REWRITE set degree of parallelism
// /
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested) {
    ExecType type = n.getExecType();
    long id = n.getID();
    // special handling for different exec models (CP, MR, MR nested)
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    if (type == ExecType.CP) {
        // determine local max parallelism constraint
        int kMax = ConfigurationManager.isParallelParFor() ? (n.isCPOnly() ? _lkmaxCP : _lkmaxMR) : 1;
        // ensure local memory constraint (for spark more conservative in order to
        // prevent unnecessary guarded collect)
        double mem = (OptimizerUtils.isSparkExecutionMode() && !n.isCPOnly()) ? _lm / 2 : _lm;
        kMax = Math.min(kMax, (int) Math.floor(mem / M));
        kMax = Math.max(kMax, 1);
        // constrain max parfor parallelism by problem size
        int parforK = (int) ((_N < kMax) ? _N : kMax);
        // FIXME rework for nested parfor parallelism and body w/o gpu ops
        if (DMLScript.USE_ACCELERATOR) {
            long perGPUBudget = GPUContextPool.initialGPUMemBudget();
            double maxMemUsage = getMaxCPOnlyBudget(n);
            if (maxMemUsage < perGPUBudget) {
                parforK = GPUContextPool.getDeviceCount();
                parforK = Math.min(parforK, (int) _N);
                LOG.debug("Setting degree of parallelism + [" + parforK + "] for GPU; per GPU budget :[" + perGPUBudget + "], parfor budget :[" + maxMemUsage + "],  max parallelism per GPU : [" + parforK + "]");
            }
        }
        // set parfor degree of parallelism
        pfpb.setDegreeOfParallelism(parforK);
        n.setK(parforK);
        // distribute remaining parallelism
        int remainParforK = getRemainingParallelismParFor(kMax, parforK);
        int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
        rAssignRemainingParallelism(n, remainParforK, remainOpsK);
    } else // ExecType.MR/ExecType.SPARK
    {
        int kMax = -1;
        if (flagNested) {
            // determine remote max parallelism constraint
            // guaranteed <= _N (see nested)
            pfpb.setDegreeOfParallelism(_rnk);
            n.setK(_rnk);
            // per node (CP only inside)
            kMax = _rkmax / _rnk;
        } else // not nested (default)
        {
            // determine remote max parallelism constraint
            int tmpK = (int) ((_N < _rk) ? _N : _rk);
            pfpb.setDegreeOfParallelism(tmpK);
            n.setK(tmpK);
            // per node (CP only inside)
            kMax = _rkmax / tmpK;
        }
        // ensure remote memory constraint
        // guaranteed >= 1 (see exec strategy)
        kMax = Math.min(kMax, (int) Math.floor(_rm / M));
        if (kMax < 1)
            kMax = 1;
        // disable nested parallelism, if required
        if (!ALLOW_REMOTE_NESTED_PARALLELISM)
            kMax = 1;
        // distribute remaining parallelism and recompile parallel instructions
        rAssignRemainingParallelism(n, kMax, 1);
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)");
}
Also used : ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 9 with ExecType

use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.

the class OptimizerRuleBased method isCPOnlyPossible.

protected boolean isCPOnlyPossible(OptNode n, double memBudget) {
    ExecType et = n.getExecType();
    boolean ret = (et == ExecType.CP);
    if (n.isLeaf() && et == getRemoteExecType()) {
        Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
        if (// e.g., -exec=hadoop
        h.getForcedExecType() != LopProperties.ExecType.MR && h.getForcedExecType() != LopProperties.ExecType.SPARK && // integer dims
        h.hasValidCPDimsAndSize()) {
            double mem = _cost.getLeafNodeEstimate(TestMeasure.MEMORY_USAGE, n, LopProperties.ExecType.CP);
            if (mem <= memBudget)
                ret = true;
        }
    }
    if (!n.isLeaf())
        for (OptNode c : n.getChilds()) {
            // early abort if already false
            if (!ret)
                break;
            ret &= isCPOnlyPossible(c, memBudget);
        }
    return ret;
}
Also used : Hop(org.apache.sysml.hops.Hop) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType)

Example 10 with ExecType

use of org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType in project systemml by apache.

the class OptimizerRuleBased method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean isCPOnly = n.isCPOnly();
    boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
    String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ExecType REMOTE = getRemoteExecType();
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    // deciding on the execution strategy
    if (// allowed remote parfor execution
    ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
    (isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
    (isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
    (isCPOnlyPossible && M2 <= _rm))) {
        // at this point all required conditions for REMOTE_MR given, now its an opt decision
        // estimated local exploited par
        int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
        // (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
        if (// incl conditional partitioning
        2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if problem is large enough and remote parallelism is larger than local
        if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
        if (!isCPOnly && isCPOnlyPossible) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
        if (flagLIX) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if remote data partitioning, because data will be distributed on all nodes
        if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // otherwise CP
        {
            // local parfor
            n.setExecType(ExecType.CP);
        }
    } else // mr instructions in body, or rm too small
    {
        // local parfor
        n.setExecType(ExecType.CP);
    }
    // actual programblock modification
    long id = n.getID();
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    PExecMode mode = n.getExecType().toParForExecMode();
    pfpb.setExecMode(mode);
    // decide if recompilation according to remote mem budget necessary
    boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
    return requiresRecompile;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

ExecType (org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType)10 Hop (org.apache.sysml.hops.Hop)4 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)4 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)4 PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)4 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 ResultVar (org.apache.sysml.parser.ParForStatementBlock.ResultVar)2 PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)2 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)2 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)2