Search in sources :

Example 11 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rContainsNode.

protected boolean rContainsNode(OptNode n, ParForProgramBlock parfor) {
    boolean ret = false;
    if (n.getNodeType() == NodeType.PARFOR) {
        ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
        ret = (parfor == pfpb);
    }
    if (!ret && !n.isLeaf())
        for (OptNode c : n.getChilds()) {
            ret |= rContainsNode(c, parfor);
            // early abort
            if (ret)
                break;
        }
    return ret;
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 12 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetOperationsExecType.

// /////
// REWRITE set operations exec type
// /
protected void rewriteSetOperationsExecType(OptNode pn, boolean recompile) {
    // set exec type in internal opt tree
    int count = setOperationExecType(pn, ExecType.CP);
    // recompile program (actual programblock modification)
    if (recompile && count <= 0)
        LOG.warn("OPT: Forced set operations exec type 'CP', but no operation requires recompile.");
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
    HashSet<String> fnStack = new HashSet<>();
    Recompiler.recompileProgramBlockHierarchy2Forced(pfpb.getChildBlocks(), 0, fnStack, LopProperties.ExecType.CP);
    // debug output
    LOG.debug(getOptMode() + " OPT: rewrite 'set operation exec type CP' - result=" + count);
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 13 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetInPlaceResultIndexing.

// /////
// REWRITE set in-place result indexing
// /
protected void rewriteSetInPlaceResultIndexing(OptNode pn, double M, LocalVariableMap vars, HashSet<ResultVar> inPlaceResultVars, ExecutionContext ec) {
    // assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Set in-place result update is only applicable for a ParFor node.");
    boolean apply = false;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
    // note currently we decide for all result vars jointly, i.e.,
    // only if all fit pinned in remaining budget, we apply this rewrite.
    ArrayList<ResultVar> retVars = pfpb.getResultVariables();
    // compute total sum of pinned result variable memory
    double sum = computeTotalSizeResultVariables(retVars, vars, pfpb.getDegreeOfParallelism());
    // NOTE: currently this rule is too conservative (the result variable is assumed to be dense and
    // most importantly counted twice if this is part of the maximum operation)
    double totalMem = Math.max((M + sum), rComputeSumMemoryIntermediates(pn, new HashSet<ResultVar>()));
    // optimization decision
    if (// basic correctness constraint
    rHasOnlyInPlaceSafeLeftIndexing(pn, retVars)) {
        // result update in-place for MR/Spark (w/ remote memory constraint)
        if ((pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR || pfpb.getExecMode() == PExecMode.REMOTE_SPARK_DP || pfpb.getExecMode() == PExecMode.REMOTE_SPARK) && totalMem < _rm) {
            apply = true;
        } else // result update in-place for CP (w/ local memory constraint)
        if (pfpb.getExecMode() == PExecMode.LOCAL && totalMem * pfpb.getDegreeOfParallelism() < _lm && // no forced mr/spark execution
        pn.isCPOnly()) {
            apply = true;
        }
    }
    // modify result variable meta data, if rewrite applied
    if (apply) {
        // will be serialized and transfered via symbol table
        for (ResultVar var : retVars) {
            Data dat = vars.get(var._name);
            if (dat instanceof MatrixObject)
                ((MatrixObject) dat).setUpdateType(UpdateType.INPLACE_PINNED);
        }
        inPlaceResultVars.addAll(retVars);
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set in-place result indexing' - result=" + apply + " (" + Arrays.toString(inPlaceResultVars.toArray(new ResultVar[0])) + ", M=" + toMB(totalMem) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 14 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetPartitionReplicationFactor.

// /////
// REWRITE set partition replication factor
// /
/**
 * Increasing the partition replication factor is beneficial if partitions are
 * read multiple times (e.g., in nested loops) because partitioning (done once)
 * gets slightly slower but there is a higher probability for local access
 *
 * NOTE: this rewrite requires 'set data partitioner' to be executed in order to
 * leverage the partitioning information in the plan tree.
 *
 * @param n internal representation of a plan alternative for program blocks and instructions
 * @param partitionedMatrices map of data partition formats
 * @param vars local variable map
 */
protected void rewriteSetPartitionReplicationFactor(OptNode n, HashMap<String, PartitionFormat> partitionedMatrices, LocalVariableMap vars) {
    boolean apply = false;
    double sizeReplicated = 0;
    int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    if (((n.getExecType() == ExecType.MR && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.name())) || (n.getExecType() == ExecType.SPARK && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_SPARK.name()))) && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) {
        apply = true;
        // account for problem and cluster constraints
        replication = (int) Math.min(_N, _rnk);
        // account for internal max constraint (note hadoop will warn if max > 10)
        replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_PARTITIONING);
        // account for remaining hdfs capacity
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(ConfigurationManager.getCachedJobConf());
            long hdfsCapacityRemain = fs.getStatus().getRemaining();
            // sum of all input sizes (w/o replication)
            long sizeInputs = 0;
            for (String var : partitionedMatrices.keySet()) {
                MatrixObject mo = (MatrixObject) vars.get(var);
                Path fname = new Path(mo.getFileName());
                if (// non-existing (e.g., CP) -> small file
                fs.exists(fname))
                    sizeInputs += fs.getContentSummary(fname).getLength();
            }
            replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs));
            // ensure at least replication 1
            replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR);
            sizeReplicated = replication * sizeInputs;
        } catch (Exception ex) {
            throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex);
        }
    }
    // modify the runtime plan
    if (apply)
        pfpb.setPartitionReplicationFactor(replication);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : ""));
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FileSystem(org.apache.hadoop.fs.FileSystem) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 15 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method removeRecursiveParFor.

protected int removeRecursiveParFor(OptNode n, HashSet<ParForProgramBlock> recPBs) {
    int count = 0;
    if (!n.isLeaf()) {
        for (OptNode sub : n.getChilds()) {
            if (sub.getNodeType() == NodeType.PARFOR) {
                long id = sub.getID();
                Object[] progobj = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
                ParForStatementBlock pfsb = (ParForStatementBlock) progobj[0];
                ParForProgramBlock pfpb = (ParForProgramBlock) progobj[1];
                if (recPBs.contains(pfpb)) {
                    // create for pb as replacement
                    Program prog = pfpb.getProgram();
                    ForProgramBlock fpb = ProgramConverter.createShallowCopyForProgramBlock(pfpb, prog);
                    // replace parfor with for, and update objectmapping
                    OptTreeConverter.replaceProgramBlock(n, sub, pfpb, fpb, false);
                    // update link to statement block
                    fpb.setStatementBlock(pfsb);
                    // update node
                    sub.setNodeType(NodeType.FOR);
                    sub.setK(1);
                    count++;
                }
            }
            count += removeRecursiveParFor(sub, recPBs);
        }
    }
    return count;
}
Also used : DMLProgram(org.apache.sysml.parser.DMLProgram) Program(org.apache.sysml.runtime.controlprogram.Program) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)57 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)24 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)22 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)21 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)20 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)20 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)17 ArrayList (java.util.ArrayList)15 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)10 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)9 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 HashSet (java.util.HashSet)8 StatementBlock (org.apache.sysml.parser.StatementBlock)6 Instruction (org.apache.sysml.runtime.instructions.Instruction)6 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)5 Data (org.apache.sysml.runtime.instructions.cp.Data)5 Hop (org.apache.sysml.hops.Hop)4 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)4 DMLProgram (org.apache.sysml.parser.DMLProgram)4