Search in sources :

Example 1 with PExecMode

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode in project incubator-systemml by apache.

the class OptimizerConstrained method optimize.

/**
 * Main optimization procedure.
 *
 * Transformation-based heuristic (rule-based) optimization
 * (no use of sb, direct change of pb).
 */
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
    LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
    OptNode pn = plan.getRoot();
    // early abort for empty parfor body
    if (pn.isLeaf())
        return true;
    // ANALYZE infrastructure properties
    super.analyzeProblemAndInfrastructure(pn);
    _cost = est;
    // debug and warnings output
    LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
    if (_rnk <= 0 || _rk <= 0)
        LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
    // ESTIMATE memory consumption
    ExecType oldET = pn.getExecType();
    int oldK = pn.getK();
    // for basic mem consumption
    pn.setSerialParFor();
    double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    pn.setExecType(oldET);
    pn.setK(oldK);
    LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
    // OPTIMIZE PARFOR PLAN
    // rewrite 1: data partitioning (incl. log. recompile RIX)
    HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
    rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
    // reestimate
    double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    // rewrite 2: remove unnecessary compare matrix
    rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
    // rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
    boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
    // reestimate
    double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
    // determine memory consumption for what-if: all-cp or partitioned
    double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
    double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
    LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
    // rewrite 4: execution strategy
    // keep old
    PExecMode tmpmode = getPExecMode(pn);
    boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
    // exec-type-specific rewrites
    if (pn.getExecType() == getRemoteExecType()) {
        if (M1 > _rm && M3 <= _rm) {
            // rewrite 1: data partitioning (apply conditional partitioning)
            rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        if (flagRecompMR) {
            // rewrite 5: set operations exec type
            rewriteSetOperationsExecType(pn, flagRecompMR);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        // rewrite 6: data colocation
        super.rewriteDataColocation(pn, ec.getVariables());
        // rewrite 7: rewrite set partition replication factor
        super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
        // rewrite 8: rewrite set partition replication factor
        super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        rewriteSetTaskPartitioner(pn, false, flagLIX);
        // rewrite 12: fused data partitioning and execution
        rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
        // rewrite 13: transpose sparse vector operations
        super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
        // rewrite 14:
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        // rewrite 15:
        super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
    } else // if( pn.getExecType() == ExecType.CP )
    {
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        // flagLIX always false
        rewriteSetTaskPartitioner(pn, false, false);
        // rewrite 14: set in-place result indexing
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        if (!OptimizerUtils.isSparkExecutionMode()) {
            // rewrite 16: runtime piggybacking
            super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
        } else {
            // rewrite 17: checkpoint injection for parfor loop body
            super.rewriteInjectSparkLoopCheckpointing(pn);
            // rewrite 18: repartition read-only inputs for zipmm
            super.rewriteInjectSparkRepartition(pn, ec.getVariables());
            // rewrite 19: eager caching for checkpoint rdds
            super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
        }
    }
    // rewrite 20: set result merge
    rewriteSetResultMerge(pn, ec.getVariables(), true);
    // rewrite 21: set local recompile memory budget
    super.rewriteSetRecompileMemoryBudget(pn);
    // /////
    // Final rewrites for cleanup / minor improvements
    // rewrite 22: parfor (in recursive functions) to for
    super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
    // rewrite 23: parfor (par=1) to for
    super.rewriteRemoveUnnecessaryParFor(pn);
    // info optimization result
    _numEvaluatedPlans = 1;
    return true;
}
Also used : HashMap(java.util.HashMap) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) HashSet(java.util.HashSet)

Example 2 with PExecMode

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode in project systemml by apache.

the class OptimizerConstrained method optimize.

/**
 * Main optimization procedure.
 *
 * Transformation-based heuristic (rule-based) optimization
 * (no use of sb, direct change of pb).
 */
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
    LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
    OptNode pn = plan.getRoot();
    // early abort for empty parfor body
    if (pn.isLeaf())
        return true;
    // ANALYZE infrastructure properties
    super.analyzeProblemAndInfrastructure(pn);
    _cost = est;
    // debug and warnings output
    LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
    if (_rnk <= 0 || _rk <= 0)
        LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
    // ESTIMATE memory consumption
    ExecType oldET = pn.getExecType();
    int oldK = pn.getK();
    // for basic mem consumption
    pn.setSerialParFor();
    double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    pn.setExecType(oldET);
    pn.setK(oldK);
    LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
    // OPTIMIZE PARFOR PLAN
    // rewrite 1: data partitioning (incl. log. recompile RIX)
    HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
    rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
    // reestimate
    double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    // rewrite 2: remove unnecessary compare matrix
    rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
    // rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
    boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
    // reestimate
    double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
    // determine memory consumption for what-if: all-cp or partitioned
    double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
    double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
    LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
    // rewrite 4: execution strategy
    // keep old
    PExecMode tmpmode = getPExecMode(pn);
    boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
    // exec-type-specific rewrites
    if (pn.getExecType() == getRemoteExecType()) {
        if (M1 > _rm && M3 <= _rm) {
            // rewrite 1: data partitioning (apply conditional partitioning)
            rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        if (flagRecompMR) {
            // rewrite 5: set operations exec type
            rewriteSetOperationsExecType(pn, flagRecompMR);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        // rewrite 6: data colocation
        super.rewriteDataColocation(pn, ec.getVariables());
        // rewrite 7: rewrite set partition replication factor
        super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
        // rewrite 8: rewrite set partition replication factor
        super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        rewriteSetTaskPartitioner(pn, false, flagLIX);
        // rewrite 12: fused data partitioning and execution
        rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
        // rewrite 13: transpose sparse vector operations
        super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
        // rewrite 14:
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        // rewrite 15:
        super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
    } else // if( pn.getExecType() == ExecType.CP )
    {
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        // flagLIX always false
        rewriteSetTaskPartitioner(pn, false, false);
        // rewrite 14: set in-place result indexing
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        if (!OptimizerUtils.isSparkExecutionMode()) {
            // rewrite 16: runtime piggybacking
            super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
        } else {
            // rewrite 17: checkpoint injection for parfor loop body
            super.rewriteInjectSparkLoopCheckpointing(pn);
            // rewrite 18: repartition read-only inputs for zipmm
            super.rewriteInjectSparkRepartition(pn, ec.getVariables());
            // rewrite 19: eager caching for checkpoint rdds
            super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
        }
    }
    // rewrite 20: set result merge
    rewriteSetResultMerge(pn, ec.getVariables(), true);
    // rewrite 21: set local recompile memory budget
    super.rewriteSetRecompileMemoryBudget(pn);
    // /////
    // Final rewrites for cleanup / minor improvements
    // rewrite 22: parfor (in recursive functions) to for
    super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
    // rewrite 23: parfor (par=1) to for
    super.rewriteRemoveUnnecessaryParFor(pn);
    // info optimization result
    _numEvaluatedPlans = 1;
    return true;
}
Also used : HashMap(java.util.HashMap) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) HashSet(java.util.HashSet)

Example 3 with PExecMode

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode in project systemml by apache.

the class OptimizerConstrained method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
@Override
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean ret = false;
    // constraint awareness
    if (n.getExecType() != null && ConfigurationManager.isParallelParFor()) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
        PExecMode mode = PExecMode.LOCAL;
        if (n.getExecType() == ExecType.MR) {
            mode = PExecMode.REMOTE_MR;
        } else if (n.getExecType() == ExecType.SPARK) {
            mode = PExecMode.REMOTE_SPARK;
        }
        pfpb.setExecMode(mode);
        LOG.debug(getOptMode() + " OPT: forced 'set execution strategy' - result=" + mode);
    } else
        ret = super.rewriteSetExecutionStategy(n, M0, M, M2, M3, flagLIX);
    return ret;
}
Also used : PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 4 with PExecMode

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode in project incubator-systemml by apache.

the class OptimizerConstrained method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
@Override
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean ret = false;
    // constraint awareness
    if (n.getExecType() != null && ConfigurationManager.isParallelParFor()) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
        PExecMode mode = PExecMode.LOCAL;
        if (n.getExecType() == ExecType.MR) {
            mode = PExecMode.REMOTE_MR;
        } else if (n.getExecType() == ExecType.SPARK) {
            mode = PExecMode.REMOTE_SPARK;
        }
        pfpb.setExecMode(mode);
        LOG.debug(getOptMode() + " OPT: forced 'set execution strategy' - result=" + mode);
    } else
        ret = super.rewriteSetExecutionStategy(n, M0, M, M2, M3, flagLIX);
    return ret;
}
Also used : PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 5 with PExecMode

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetExecutionStategy.

// /////
// REWRITE set execution strategy
// /
protected boolean rewriteSetExecutionStategy(OptNode n, double M0, double M, double M2, double M3, boolean flagLIX) {
    boolean isCPOnly = n.isCPOnly();
    boolean isCPOnlyPossible = isCPOnly || isCPOnlyPossible(n, _rm);
    String datapartitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ExecType REMOTE = getRemoteExecType();
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    // deciding on the execution strategy
    if (// allowed remote parfor execution
    ConfigurationManager.isParallelParFor() && (// Required: all inst already in cp and fit in remote mem
    (isCPOnly && M <= _rm) || // Required: all inst already in cp and fit partitioned in remote mem
    (isCPOnly && M3 <= _rm) || // Required: all inst forced to cp fit in remote mem
    (isCPOnlyPossible && M2 <= _rm))) {
        // at this point all required conditions for REMOTE_MR given, now its an opt decision
        // estimated local exploited par
        int cpk = (int) Math.min(_lk, Math.floor(_lm / M));
        // (the factor of 2 is to account for hyper-threading and in order prevent too eager remote parfor)
        if (// incl conditional partitioning
        2 * cpk < _lk && 2 * cpk < _N && 2 * cpk < _rk) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if problem is large enough and remote parallelism is larger than local
        if (_lk < _N && _lk < _rk && M <= _rm && isLargeProblem(n, M0)) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if MR operations in local, but CP only in remote (less overall MR jobs)
        if (!isCPOnly && isCPOnlyPossible) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if necessary for LIX rewrite (LIX true iff cp only and rm valid)
        if (flagLIX) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // MR if remote data partitioning, because data will be distributed on all nodes
        if (datapartitioner != null && datapartitioner.equals(REMOTE_DP.toString()) && !InfrastructureAnalyzer.isLocalMode()) {
            // remote parfor
            n.setExecType(REMOTE);
        } else // otherwise CP
        {
            // local parfor
            n.setExecType(ExecType.CP);
        }
    } else // mr instructions in body, or rm too small
    {
        // local parfor
        n.setExecType(ExecType.CP);
    }
    // actual programblock modification
    long id = n.getID();
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    PExecMode mode = n.getExecType().toParForExecMode();
    pfpb.setExecMode(mode);
    // decide if recompilation according to remote mem budget necessary
    boolean requiresRecompile = ((mode == PExecMode.REMOTE_MR || mode == PExecMode.REMOTE_SPARK) && !isCPOnly);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set execution strategy' - result=" + mode + " (recompile=" + requiresRecompile + ")");
    return requiresRecompile;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)8 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)6 PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)4 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)4 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)4 ExecType (org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType)4 HashMap (java.util.HashMap)2 HashSet (java.util.HashSet)2 ResultVar (org.apache.sysml.parser.ParForStatementBlock.ResultVar)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2