Search in sources :

Example 16 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteRemoveUnnecessaryCompareMatrix.

// /////
// REWRITE remove compare matrix (for result merge, needs to be invoked before setting result merge)
// /
protected void rewriteRemoveUnnecessaryCompareMatrix(OptNode n, ExecutionContext ec) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    ArrayList<ResultVar> cleanedVars = new ArrayList<>();
    ArrayList<ResultVar> resultVars = pfpb.getResultVariables();
    String itervar = pfpb.getIterVar();
    for (ResultVar rvar : resultVars) {
        Data dat = ec.getVariable(rvar._name);
        if (// subject to result merge with compare
        dat instanceof MatrixObject && ((MatrixObject) dat).getNnz() != 0 && // guaranteed no conditional indexing
        n.hasOnlySimpleChilds() && // guaranteed full matrix replace
        rContainsResultFullReplace(n, rvar._name, itervar, (MatrixObject) dat) && // && !pfsb.variablesRead().containsVariable(rvar)                  //never read variable in loop body
        !// never read variable in loop body
        rIsReadInRightIndexing(n, rvar._name) && ((MatrixObject) dat).getNumRows() <= Integer.MAX_VALUE && ((MatrixObject) dat).getNumColumns() <= Integer.MAX_VALUE) {
            // replace existing matrix object with empty matrix
            MatrixObject mo = (MatrixObject) dat;
            ec.cleanupCacheableData(mo);
            ec.setMatrixOutput(rvar._name, new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false), null);
            // keep track of cleaned result variables
            cleanedVars.add(rvar);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'remove unnecessary compare matrix' - result=" + (!cleanedVars.isEmpty()) + " (" + ProgramConverter.serializeResultVariables(cleanedVars) + ")");
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 17 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method removeUnnecessaryParFor.

protected int removeUnnecessaryParFor(OptNode n) {
    int count = 0;
    if (!n.isLeaf()) {
        for (OptNode sub : n.getChilds()) {
            if (sub.getNodeType() == NodeType.PARFOR && sub.getK() == 1) {
                long id = sub.getID();
                Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
                ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
                ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
                // create for pb as replacement
                Program prog = pfpb.getProgram();
                ForProgramBlock fpb = ProgramConverter.createShallowCopyForProgramBlock(pfpb, prog);
                // replace parfor with for, and update objectmapping
                OptTreeConverter.replaceProgramBlock(n, sub, pfpb, fpb, false);
                // update link to statement block
                fpb.setStatementBlock(pfsb);
                // update node
                sub.setNodeType(NodeType.FOR);
                sub.setK(1);
                count++;
            }
            count += removeUnnecessaryParFor(sub);
        }
    }
    return count;
}
Also used : DMLProgram(org.apache.sysml.parser.DMLProgram) Program(org.apache.sysml.runtime.controlprogram.Program) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 18 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteEnableRuntimePiggybacking.

// /////
// REWRITE enable runtime piggybacking
// /
protected void rewriteEnableRuntimePiggybacking(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    HashSet<String> sharedVars = new HashSet<>();
    boolean apply = false;
    // enable runtime piggybacking if MR jobs on shared read-only data set
    if (OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING) {
        // apply runtime piggybacking if hop in mr and shared input variable
        // (any input variabled which is not partitioned and is read only and applies)
        apply = rHasSharedMRInput(n, vars.keySet(), partitionedMatrices.keySet(), sharedVars) && // apply only if degree of parallelism > 1
        n.getTotalK() > 1;
    }
    if (apply)
        pfpb.setRuntimePiggybacking(apply);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'enable runtime piggybacking' - result=" + apply + " (" + ProgramConverter.serializeStringCollection(sharedVars) + ")");
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 19 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetSparkEagerRDDCaching.

// /////
// REWRITE set spark eager rdd caching
// /
protected void rewriteSetSparkEagerRDDCaching(OptNode n, LocalVariableMap vars) {
    // get program blocks of root parfor
    Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID());
    ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
    ArrayList<String> ret = new ArrayList<>();
    if (// spark exec mode
    OptimizerUtils.isSparkExecutionMode() && // local parfor
    n.getExecType() == ExecType.CP && // at least 2 iterations
    _N > 1) {
        Set<String> cand = pfsb.variablesRead().getVariableNames();
        Collection<String> rpVars = pfpb.getSparkRepartitionVariables();
        for (String var : cand) {
            Data dat = vars.get(var);
            if (dat != null && dat instanceof MatrixObject && ((MatrixObject) dat).getRDDHandle() != null) {
                MatrixObject mo = (MatrixObject) dat;
                MatrixCharacteristics mc = mo.getMatrixCharacteristics();
                RDDObject rdd = mo.getRDDHandle();
                if (// not a repartition var
                (rpVars == null || !rpVars.contains(var)) && // is cached rdd
                rdd.rHasCheckpointRDDChilds() && // is out-of-core dataset
                _lm / n.getK() < OptimizerUtils.estimateSizeExactSparsity(mc)) {
                    ret.add(var);
                }
            }
        }
        // apply rewrite to parfor pb
        if (!ret.isEmpty()) {
            pfpb.setSparkEagerCacheVariables(ret);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set spark eager rdd caching' - result=" + ret.size() + " (" + ProgramConverter.serializeStringCollection(ret) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject)

Example 20 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteDataColocation.

// /////
// REWRITE enable data colocation
// /
/**
 * NOTE: if MAX_REPLICATION_FACTOR_PARTITIONING is set larger than 10, co-location may
 * throw warnings per split since this exceeds "max block locations"
 *
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param vars local variable map
 */
protected void rewriteDataColocation(OptNode n, LocalVariableMap vars) {
    // data colocation is beneficial if we have dp=REMOTE_MR, etype=REMOTE_MR
    // and there is at least one direct col-/row-wise access with the index variable
    // on the partitioned matrix
    boolean apply = false;
    String varname = null;
    String partitioner = n.getParam(ParamType.DATA_PARTITIONER);
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    if (partitioner != null && partitioner.equals(PDataPartitioner.REMOTE_MR.toString()) && n.getExecType() == ExecType.MR) {
        // find all candidates matrices (at least one partitioned access via iterVar)
        HashSet<String> cand = new HashSet<>();
        rFindDataColocationCandidates(n, cand, pfpb.getIterVar());
        // select largest matrix for colocation (based on nnz to account for sparsity)
        long nnzMax = Long.MIN_VALUE;
        for (String c : cand) {
            MatrixObject tmp = (MatrixObject) vars.get(c);
            if (tmp != null) {
                long nnzTmp = tmp.getNnz();
                if (nnzTmp > nnzMax) {
                    nnzMax = nnzTmp;
                    varname = c;
                    apply = true;
                }
            }
        }
    }
    // modify the runtime plan (apply true if at least one candidate)
    if (apply)
        pfpb.enableColocatedPartitionedMatrix(varname);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'enable data colocation' - result=" + apply + ((apply) ? " (" + varname + ")" : ""));
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Aggregations

ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)57 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)24 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)22 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)21 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)20 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)20 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)17 ArrayList (java.util.ArrayList)15 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)9 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 HashSet (java.util.HashSet)8 StatementBlock (org.apache.sysml.parser.StatementBlock)6 Instruction (org.apache.sysml.runtime.instructions.Instruction)6 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)5 Data (org.apache.sysml.runtime.instructions.cp.Data)5 Hop (org.apache.sysml.hops.Hop)4 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)4 DMLProgram (org.apache.sysml.parser.DMLProgram)4