Search in sources :

Example 16 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetFusedDataPartitioningExecution.

// /////
// REWRITE set fused data partitioning / execution
// /
/**
 * This dedicated execution mode can only be applied if all of the
 * following conditions are true:
 * - Only cp instructions in the parfor body
 * - Only one partitioned input
 * - number of iterations is equal to number of partitions (nrow/ncol)
 * - partitioned matrix access via plain iteration variables (no composed expressions)
 *   (this ensures that each partition is exactly read once)
 * - no left indexing (since by default static task partitioning)
 *
 * Furthermore, it should be only chosen if we already decided for remote partitioning
 * and otherwise would create a large number of partition files.
 *
 * NOTE: We already respect the reducer memory budget for plan correctness. However,
 * we miss optimization potential if the reducer budget is larger than the mapper budget
 * (if we were not able to select REMOTE_MR as execution strategy wrt mapper budget)
 * TODO modify 'set exec strategy' and related rewrites for conditional data partitioning.
 *
 * @param pn internal representation of a plan alternative for program blocks and instructions
 * @param M ?
 * @param flagLIX ?
 * @param partitionedMatrices map of data partition formats
 * @param vars local variable map
 */
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
    // assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Fused data partitioning and execution is only applicable for a ParFor node.");
    boolean apply = false;
    String partitioner = pn.getParam(ParamType.DATA_PARTITIONER);
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PExecMode REMOTE_DPE = OptimizerUtils.isSparkExecutionMode() ? PExecMode.REMOTE_SPARK_DP : PExecMode.REMOTE_MR_DP;
    // try to merge MR data partitioning and MR exec
    if ((// fits into remote memory of reducers
    pn.getExecType() == ExecType.MR && M < _rm2 || // MR/SP EXEC and CP body
    pn.getExecType() == ExecType.SPARK) && partitioner != null && // MR/SP partitioning
    partitioner.equals(REMOTE_DP.toString()) && // only one partitioned matrix
    partitionedMatrices.size() == 1) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
        // partitioned matrix
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        MatrixObject mo = (MatrixObject) vars.get(moVarname);
        if (rIsAccessByIterationVariable(pn, moVarname, pfpb.getIterVar()) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
            int k = (int) Math.min(_N, _rk2);
            pn.addParam(ParamType.DATA_PARTITIONER, REMOTE_DPE.toString() + "(fused)");
            pn.setK(k);
            // set fused exec type
            pfpb.setExecMode(REMOTE_DPE);
            pfpb.setDataPartitioner(PDataPartitioner.NONE);
            pfpb.enableColocatedPartitionedMatrix(moVarname);
            pfpb.setDegreeOfParallelism(k);
            apply = true;
        }
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set fused data partitioning and execution' - result=" + apply);
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 17 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetDataPartitioner.

// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
    if (n.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
    boolean blockwise = false;
    // preparations
    long id = n.getID();
    Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
    ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
    // search for candidates
    boolean apply = false;
    if (// only if we are allowed to recompile
    OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
    (_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
        HashMap<String, PartitionFormat> cand2 = new HashMap<>();
        for (String c : pfsb.getReadOnlyParentVars()) {
            PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
            if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
                cand2.put(c, dpf);
            }
        }
        apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
        if (apply)
            partitionedMatrices.putAll(cand2);
    }
    PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
    // NOTE: since partitioning is only applied in case of MR index access, we assume a large
    // matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
    // potentially unnecessary MR jobs for smaller matrices)
    // modify rtprog
    pfpb.setDataPartitioner(pdp);
    // modify plan
    n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
    return blockwise;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) HashMap(java.util.HashMap) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 18 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class OptimizerRuleBased method rewriteSetFusedDataPartitioningExecution.

// /////
// REWRITE set fused data partitioning / execution
// /
/**
 * This dedicated execution mode can only be applied if all of the
 * following conditions are true:
 * - Only cp instructions in the parfor body
 * - Only one partitioned input
 * - number of iterations is equal to number of partitions (nrow/ncol)
 * - partitioned matrix access via plain iteration variables (no composed expressions)
 *   (this ensures that each partition is exactly read once)
 * - no left indexing (since by default static task partitioning)
 *
 * Furthermore, it should be only chosen if we already decided for remote partitioning
 * and otherwise would create a large number of partition files.
 *
 * NOTE: We already respect the reducer memory budget for plan correctness. However,
 * we miss optimization potential if the reducer budget is larger than the mapper budget
 * (if we were not able to select REMOTE_MR as execution strategy wrt mapper budget)
 * TODO modify 'set exec strategy' and related rewrites for conditional data partitioning.
 *
 * @param pn internal representation of a plan alternative for program blocks and instructions
 * @param M ?
 * @param flagLIX ?
 * @param partitionedMatrices map of data partition formats
 * @param vars local variable map
 */
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
    // assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Fused data partitioning and execution is only applicable for a ParFor node.");
    boolean apply = false;
    String partitioner = pn.getParam(ParamType.DATA_PARTITIONER);
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PExecMode REMOTE_DPE = OptimizerUtils.isSparkExecutionMode() ? PExecMode.REMOTE_SPARK_DP : PExecMode.REMOTE_MR_DP;
    // try to merge MR data partitioning and MR exec
    if ((// fits into remote memory of reducers
    pn.getExecType() == ExecType.MR && M < _rm2 || // MR/SP EXEC and CP body
    pn.getExecType() == ExecType.SPARK) && partitioner != null && // MR/SP partitioning
    partitioner.equals(REMOTE_DP.toString()) && // only one partitioned matrix
    partitionedMatrices.size() == 1) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
        // partitioned matrix
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        MatrixObject mo = (MatrixObject) vars.get(moVarname);
        if (rIsAccessByIterationVariable(pn, moVarname, pfpb.getIterVar()) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
            int k = (int) Math.min(_N, _rk2);
            pn.addParam(ParamType.DATA_PARTITIONER, REMOTE_DPE.toString() + "(fused)");
            pn.setK(k);
            // set fused exec type
            pfpb.setExecMode(REMOTE_DPE);
            pfpb.setDataPartitioner(PDataPartitioner.NONE);
            pfpb.enableColocatedPartitionedMatrix(moVarname);
            pfpb.setDegreeOfParallelism(k);
            apply = true;
        }
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set fused data partitioning and execution' - result=" + apply);
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 19 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class OptimizerRuleBased method rewriteSetDataPartitioner.

// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
    if (n.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
    boolean blockwise = false;
    // preparations
    long id = n.getID();
    Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
    ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
    // search for candidates
    boolean apply = false;
    if (// only if we are allowed to recompile
    OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
    (_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
        HashMap<String, PartitionFormat> cand2 = new HashMap<>();
        for (String c : pfsb.getReadOnlyParentVars()) {
            PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
            if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
                cand2.put(c, dpf);
            }
        }
        apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
        if (apply)
            partitionedMatrices.putAll(cand2);
    }
    PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
    // NOTE: since partitioning is only applied in case of MR index access, we assume a large
    // matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
    // potentially unnecessary MR jobs for smaller matrices)
    // modify rtprog
    pfpb.setDataPartitioner(pdp);
    // modify plan
    n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
    return blockwise;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) HashMap(java.util.HashMap) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 20 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project systemml by apache.

the class OptimizerConstrained method rewriteSetFusedDataPartitioningExecution.

// /////
// REWRITE set fused data partitioning / execution
// /
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars, PExecMode emode) {
    if (emode == PExecMode.REMOTE_MR_DP || emode == PExecMode.REMOTE_SPARK_DP) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
        // partitioned matrix
        if (partitionedMatrices.size() <= 0) {
            LOG.debug(getOptMode() + " OPT: unable to force 'set fused data partitioning and execution' - result=" + false);
            return;
        }
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        MatrixObject mo = (MatrixObject) vars.get(moVarname);
        if (rIsAccessByIterationVariable(pn, moVarname, pfpb.getIterVar()) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
            int k = (int) Math.min(_N, _rk2);
            if (emode == PExecMode.REMOTE_MR_DP) {
                pn.addParam(ParamType.DATA_PARTITIONER, "REMOTE_MR(fused)");
                // set fused exec type
                pfpb.setExecMode(PExecMode.REMOTE_MR_DP);
            } else {
                pn.addParam(ParamType.DATA_PARTITIONER, "REMOTE_SPARK(fused)");
                // set fused exec type
                pfpb.setExecMode(PExecMode.REMOTE_SPARK_DP);
            }
            pn.setK(k);
            pfpb.setDataPartitioner(PDataPartitioner.NONE);
            pfpb.enableColocatedPartitionedMatrix(moVarname);
            pfpb.setDegreeOfParallelism(k);
        }
        LOG.debug(getOptMode() + " OPT: force 'set fused data partitioning and execution' - result=" + true);
    } else
        super.rewriteSetFusedDataPartitioningExecution(pn, M, flagLIX, partitionedMatrices, vars);
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)24 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)24 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)14 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)8 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)8 HashMap (java.util.HashMap)4 DataType (org.apache.sysml.parser.Expression.DataType)4 ValueType (org.apache.sysml.parser.Expression.ValueType)4 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)4 PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)4 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 LinkedList (java.util.LinkedList)2 StringTokenizer (java.util.StringTokenizer)2 FileSplit (org.apache.hadoop.mapred.FileSplit)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2