Search in sources :

Example 6 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetFusedDataPartitioningExecution.

///////
//REWRITE set fused data partitioning / execution
///
/**
	 * This dedicated execution mode can only be applied if all of the 
	 * following conditions are true:
	 * - Only cp instructions in the parfor body
	 * - Only one partitioned input 
	 * - number of iterations is equal to number of partitions (nrow/ncol)
	 * - partitioned matrix access via plain iteration variables (no composed expressions)
	 *   (this ensures that each partition is exactly read once)
	 * - no left indexing (since by default static task partitioning)
	 * 
	 * Furthermore, it should be only chosen if we already decided for remote partitioning
	 * and otherwise would create a large number of partition files.
	 * 
	 * NOTE: We already respect the reducer memory budget for plan correctness. However,
	 * we miss optimization potential if the reducer budget is larger than the mapper budget
	 * (if we were not able to select REMOTE_MR as execution strategy wrt mapper budget)
	 * TODO modify 'set exec strategy' and related rewrites for conditional data partitioning.
	 * 
	 * @param pn internal representation of a plan alternative for program blocks and instructions
	 * @param M ?
	 * @param flagLIX ?
	 * @param partitionedMatrices map of data partition formats
	 * @param vars local variable map
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) throws DMLRuntimeException {
    //assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Fused data partitioning and execution is only applicable for a ParFor node.");
    boolean apply = false;
    String partitioner = pn.getParam(ParamType.DATA_PARTITIONER);
    PDataPartitioner REMOTE_DP = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PExecMode REMOTE_DPE = OptimizerUtils.isSparkExecutionMode() ? PExecMode.REMOTE_SPARK_DP : PExecMode.REMOTE_MR_DP;
    // try to merge MR data partitioning and MR exec 
    if ((//fits into remote memory of reducers
    pn.getExecType() == ExecType.MR && M < _rm2 || //MR/SP EXEC and CP body
    pn.getExecType() == ExecType.SPARK) && partitioner != null && //MR/SP partitioning
    partitioner.equals(REMOTE_DP.toString()) && //only one partitioned matrix
    partitionedMatrices.size() == 1) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
        //partitioned matrix
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        MatrixObject mo = (MatrixObject) vars.get(moVarname);
        //check if access via iteration variable and sizes match
        String iterVarname = pfpb.getIterablePredicateVars()[0];
        if (rIsAccessByIterationVariable(pn, moVarname, iterVarname) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
            int k = (int) Math.min(_N, _rk2);
            pn.addParam(ParamType.DATA_PARTITIONER, REMOTE_DPE.toString() + "(fused)");
            pn.setK(k);
            //set fused exec type	
            pfpb.setExecMode(REMOTE_DPE);
            pfpb.setDataPartitioner(PDataPartitioner.NONE);
            pfpb.enableColocatedPartitionedMatrix(moVarname);
            pfpb.setDegreeOfParallelism(k);
            apply = true;
        }
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set fused data partitioning and execution' - result=" + apply);
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 7 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerConstrained method rewriteSetFusedDataPartitioningExecution.

///////
//REWRITE set fused data partitioning / execution
///
protected void rewriteSetFusedDataPartitioningExecution(OptNode pn, double M, boolean flagLIX, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars, PExecMode emode) throws DMLRuntimeException {
    if (emode == PExecMode.REMOTE_MR_DP || emode == PExecMode.REMOTE_SPARK_DP) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
        //partitioned matrix
        if (partitionedMatrices.size() <= 0) {
            LOG.debug(getOptMode() + " OPT: unable to force 'set fused data partitioning and execution' - result=" + false);
            return;
        }
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        MatrixObject mo = (MatrixObject) vars.get(moVarname);
        //check if access via iteration variable and sizes match
        String iterVarname = pfpb.getIterablePredicateVars()[0];
        if (rIsAccessByIterationVariable(pn, moVarname, iterVarname) && ((moDpf == PartitionFormat.ROW_WISE && mo.getNumRows() == _N) || (moDpf == PartitionFormat.COLUMN_WISE && mo.getNumColumns() == _N) || (moDpf._dpf == PDataPartitionFormat.ROW_BLOCK_WISE_N && mo.getNumRows() <= _N * moDpf._N) || (moDpf._dpf == PDataPartitionFormat.COLUMN_BLOCK_WISE_N && mo.getNumColumns() <= _N * moDpf._N))) {
            int k = (int) Math.min(_N, _rk2);
            if (emode == PExecMode.REMOTE_MR_DP) {
                pn.addParam(ParamType.DATA_PARTITIONER, "REMOTE_MR(fused)");
                //set fused exec type
                pfpb.setExecMode(PExecMode.REMOTE_MR_DP);
            } else {
                pn.addParam(ParamType.DATA_PARTITIONER, "REMOTE_SPARK(fused)");
                //set fused exec type
                pfpb.setExecMode(PExecMode.REMOTE_SPARK_DP);
            }
            pn.setK(k);
            pfpb.setDataPartitioner(PDataPartitioner.NONE);
            pfpb.enableColocatedPartitionedMatrix(moVarname);
            pfpb.setDegreeOfParallelism(k);
        }
        LOG.debug(getOptMode() + " OPT: force 'set fused data partitioning and execution' - result=" + true);
    } else
        super.rewriteSetFusedDataPartitioningExecution(pn, M, flagLIX, partitionedMatrices, vars);
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 8 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetTranposeSparseVectorOperations.

///////
//REWRITE transpose sparse vector operations
///
protected void rewriteSetTranposeSparseVectorOperations(OptNode pn, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) throws DMLRuntimeException {
    //assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Transpose sparse vector operations is only applicable for a ParFor node.");
    boolean apply = false;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
    if (pfpb.getExecMode() == PExecMode.REMOTE_MR_DP && //general applicable
    partitionedMatrices.size() == 1) {
        String moVarname = partitionedMatrices.keySet().iterator().next();
        PartitionFormat moDpf = partitionedMatrices.get(moVarname);
        Data dat = vars.get(moVarname);
        if (dat != null && dat instanceof MatrixObject && moDpf == PartitionFormat.COLUMN_WISE && //check for sparse matrix
        ((MatrixObject) dat).getSparsity() <= MatrixBlock.SPARSITY_TURN_POINT && //tranpose-safe
        rIsTransposeSafePartition(pn, moVarname)) {
            pfpb.setTransposeSparseColumnVector(true);
            apply = true;
        }
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set transpose sparse vector operations' - result=" + apply);
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Data(org.apache.sysml.runtime.instructions.cp.Data) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 9 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class ParForStatementBlock method determineAccessPattern.

private PartitionFormat determineAccessPattern(IndexedIdentifier dat) {
    boolean isSpark = OptimizerUtils.isSparkExecutionMode();
    int blksz = ConfigurationManager.getBlocksize();
    PartitionFormat dpf = null;
    //1) get all bounds expressions for index access
    Expression rowL = dat.getRowLowerBound();
    Expression rowU = dat.getRowUpperBound();
    Expression colL = dat.getColLowerBound();
    Expression colU = dat.getColUpperBound();
    boolean allRows = (rowL == null && rowU == null);
    boolean allCols = (colL == null && colU == null);
    try {
        //COLUMN_WISE if all rows and access to single column
        if (allRows && colL != null && colL.equals(colU)) {
            dpf = PartitionFormat.COLUMN_WISE;
        } else //ROW_WISE if all cols and access to single row
        if (allCols && rowL != null && rowL.equals(rowU)) {
            dpf = PartitionFormat.ROW_WISE;
        } else //COLUMN_BLOCK_WISE
        if (isSpark && allRows && colL != colU) {
            LinearFunction l1 = getLinearFunction(colL, true);
            LinearFunction l2 = getLinearFunction(colU, true);
            dpf = !isAlignedBlocking(l1, l2, blksz) ? PartitionFormat.NONE : new PartitionFormat(PDataPartitionFormat.COLUMN_BLOCK_WISE_N, (int) l1._b[0]);
        } else //ROW_BLOCK_WISE
        if (isSpark && allCols && rowL != rowU) {
            LinearFunction l1 = getLinearFunction(rowL, true);
            LinearFunction l2 = getLinearFunction(rowU, true);
            dpf = !isAlignedBlocking(l1, l2, blksz) ? PartitionFormat.NONE : new PartitionFormat(PDataPartitionFormat.ROW_BLOCK_WISE_N, (int) l1._b[0]);
        } else
            //NONE otherwise (conservative)
            dpf = PartitionFormat.NONE;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    return dpf;
}
Also used : PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) IOException(java.io.IOException)

Example 10 with PartitionFormat

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat in project incubator-systemml by apache.

the class ParForStatementBlock method determineDataPartitionFormat.

/**
	 * Determines the PDataPartitioningFormat for read-only parent variables according
	 * to the access pattern of that variable within the parfor statement block.
	 * Row-wise or column wise partitioning is only suggested if we see pure row-wise or
	 * column-wise access patterns.
	 * 
	 * @param var variables
	 * @return partition format
	 */
public PartitionFormat determineDataPartitionFormat(String var) {
    PartitionFormat dpf = null;
    List<PartitionFormat> dpfc = new LinkedList<PartitionFormat>();
    try {
        //determine partitioning candidates
        ParForStatement dpfs = (ParForStatement) _statements.get(0);
        rDeterminePartitioningCandidates(var, dpfs.getBody(), dpfc);
        //determine final solution		
        for (PartitionFormat tmp : dpfc) dpf = //if no consensus
        (dpf != null && !dpf.equals(tmp)) ? PartitionFormat.NONE : tmp;
        if (dpf == null)
            dpf = PartitionFormat.NONE;
    } catch (LanguageException e) {
        LOG.trace("Unable to determine partitioning candidates.", e);
        dpf = PartitionFormat.NONE;
    }
    return dpf;
}
Also used : PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) LinkedList(java.util.LinkedList)

Aggregations

PDataPartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat)10 PartitionFormat (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)7 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)4 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)3 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)3 HashMap (java.util.HashMap)2 DataType (org.apache.sysml.parser.Expression.DataType)2 ValueType (org.apache.sysml.parser.Expression.ValueType)2 PDataPartitioner (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner)2 PExecMode (org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode)2 Data (org.apache.sysml.runtime.instructions.cp.Data)2 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 StringTokenizer (java.util.StringTokenizer)1 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)1 UpdateType (org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType)1