Search in sources :

Example 86 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteSetDataPartitioner.

// /////
// REWRITE set data partitioner
// /
protected boolean rewriteSetDataPartitioner(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices, double thetaM) {
    if (n.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Data partitioner can only be set for a ParFor node.");
    boolean blockwise = false;
    // preparations
    long id = n.getID();
    Object[] o = OptTreeConverter.getAbstractPlanMapping().getMappedProg(id);
    ParForStatementBlock pfsb = (ParForStatementBlock) o[0];
    ParForProgramBlock pfpb = (ParForProgramBlock) o[1];
    // search for candidates
    boolean apply = false;
    if (// only if we are allowed to recompile
    OptimizerUtils.isHybridExecutionMode() && // only if beneficial wrt problem size
    (_N >= PROB_SIZE_THRESHOLD_PARTITIONING || _Nmax >= PROB_SIZE_THRESHOLD_PARTITIONING)) {
        HashMap<String, PartitionFormat> cand2 = new HashMap<>();
        for (String c : pfsb.getReadOnlyParentVars()) {
            PartitionFormat dpf = pfsb.determineDataPartitionFormat(c);
            if (dpf != PartitionFormat.NONE && dpf._dpf != PDataPartitionFormat.BLOCK_WISE_M_N) {
                cand2.put(c, dpf);
            }
        }
        apply = rFindDataPartitioningCandidates(n, cand2, vars, thetaM);
        if (apply)
            partitionedMatrices.putAll(cand2);
    }
    PDataPartitioner REMOTE = OptimizerUtils.isSparkExecutionMode() ? PDataPartitioner.REMOTE_SPARK : PDataPartitioner.REMOTE_MR;
    PDataPartitioner pdp = (apply) ? REMOTE : PDataPartitioner.NONE;
    // NOTE: since partitioning is only applied in case of MR index access, we assume a large
    // matrix and hence always apply REMOTE_MR (the benefit for large matrices outweigths
    // potentially unnecessary MR jobs for smaller matrices)
    // modify rtprog
    pfpb.setDataPartitioner(pdp);
    // modify plan
    n.addParam(ParamType.DATA_PARTITIONER, pdp.toString());
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set data partitioner' - result=" + pdp.toString() + " (" + ProgramConverter.serializeStringCollection(partitionedMatrices.keySet()) + ")");
    return blockwise;
}
Also used : PDataPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitioner) HashMap(java.util.HashMap) ParForStatementBlock(org.apache.sysml.parser.ParForStatementBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 87 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteSetDegreeOfParallelism.

// /////
// REWRITE set degree of parallelism
// /
protected void rewriteSetDegreeOfParallelism(OptNode n, double M, boolean flagNested) {
    ExecType type = n.getExecType();
    long id = n.getID();
    // special handling for different exec models (CP, MR, MR nested)
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
    if (type == ExecType.CP) {
        // determine local max parallelism constraint
        int kMax = ConfigurationManager.isParallelParFor() ? (n.isCPOnly() ? _lkmaxCP : _lkmaxMR) : 1;
        // ensure local memory constraint (for spark more conservative in order to
        // prevent unnecessary guarded collect)
        double mem = (OptimizerUtils.isSparkExecutionMode() && !n.isCPOnly()) ? _lm / 2 : _lm;
        kMax = Math.min(kMax, (int) Math.floor(mem / M));
        kMax = Math.max(kMax, 1);
        // constrain max parfor parallelism by problem size
        int parforK = (int) ((_N < kMax) ? _N : kMax);
        // FIXME rework for nested parfor parallelism and body w/o gpu ops
        if (DMLScript.USE_ACCELERATOR) {
            long perGPUBudget = GPUContextPool.initialGPUMemBudget();
            double maxMemUsage = getMaxCPOnlyBudget(n);
            if (maxMemUsage < perGPUBudget) {
                parforK = GPUContextPool.getDeviceCount();
                parforK = Math.min(parforK, (int) _N);
                LOG.debug("Setting degree of parallelism + [" + parforK + "] for GPU; per GPU budget :[" + perGPUBudget + "], parfor budget :[" + maxMemUsage + "],  max parallelism per GPU : [" + parforK + "]");
            }
        }
        // set parfor degree of parallelism
        pfpb.setDegreeOfParallelism(parforK);
        n.setK(parforK);
        // distribute remaining parallelism
        int remainParforK = getRemainingParallelismParFor(kMax, parforK);
        int remainOpsK = getRemainingParallelismOps(_lkmaxCP, parforK);
        rAssignRemainingParallelism(n, remainParforK, remainOpsK);
    } else // ExecType.MR/ExecType.SPARK
    {
        int kMax = -1;
        if (flagNested) {
            // determine remote max parallelism constraint
            // guaranteed <= _N (see nested)
            pfpb.setDegreeOfParallelism(_rnk);
            n.setK(_rnk);
            // per node (CP only inside)
            kMax = _rkmax / _rnk;
        } else // not nested (default)
        {
            // determine remote max parallelism constraint
            int tmpK = (int) ((_N < _rk) ? _N : _rk);
            pfpb.setDegreeOfParallelism(tmpK);
            n.setK(tmpK);
            // per node (CP only inside)
            kMax = _rkmax / tmpK;
        }
        // ensure remote memory constraint
        // guaranteed >= 1 (see exec strategy)
        kMax = Math.min(kMax, (int) Math.floor(_rm / M));
        if (kMax < 1)
            kMax = 1;
        // disable nested parallelism, if required
        if (!ALLOW_REMOTE_NESTED_PARALLELISM)
            kMax = 1;
        // distribute remaining parallelism and recompile parallel instructions
        rAssignRemainingParallelism(n, kMax, 1);
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set degree of parallelism' - result=(see EXPLAIN)");
}
Also used : ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 88 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rAssignRemainingParallelism.

protected void rAssignRemainingParallelism(OptNode n, int parforK, int opsK) {
    ArrayList<OptNode> childs = n.getChilds();
    if (childs != null) {
        boolean recompileSB = false;
        for (OptNode c : childs) {
            if (c.getNodeType() == NodeType.PARFOR) {
                // constrain max parfor parallelism by problem size
                int tmpN = Integer.parseInt(c.getParam(ParamType.NUM_ITERATIONS));
                int tmpK = (tmpN < parforK) ? tmpN : parforK;
                // set parfor degree of parallelism
                long id = c.getID();
                c.setK(tmpK);
                ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(id)[1];
                pfpb.setDegreeOfParallelism(tmpK);
                // distribute remaining parallelism
                int remainParforK = getRemainingParallelismParFor(parforK, tmpK);
                int remainOpsK = getRemainingParallelismOps(opsK, tmpK);
                rAssignRemainingParallelism(c, remainParforK, remainOpsK);
            } else if (c.getNodeType() == NodeType.HOP) {
                // set degree of parallelism for multi-threaded leaf nodes
                Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(c.getID());
                if (ConfigurationManager.isParallelMatrixOperations() && // abop, datagenop, qop, paramop
                h instanceof MultiThreadedHop && !(// only paramop-grpagg
                h instanceof ParameterizedBuiltinOp && !HopRewriteUtils.isValidOp(((ParameterizedBuiltinOp) h).getOp(), ParamBuiltinOp.GROUPEDAGG, ParamBuiltinOp.REXPAND)) && !(// only unaryop-cumulativeagg
                h instanceof UnaryOp && !((UnaryOp) h).isCumulativeUnaryOperation()) && !(// only reorgop-transpose
                h instanceof ReorgOp && ((ReorgOp) h).getOp() != ReOrgOp.TRANSPOSE)) {
                    MultiThreadedHop mhop = (MultiThreadedHop) h;
                    // set max constraint in hop
                    mhop.setMaxNumThreads(opsK);
                    // set optnode k (for explain)
                    c.setK(opsK);
                    // need to recompile SB, if changed constraint
                    recompileSB = true;
                } else // for all other multi-threaded hops set k=1 to simply debugging
                if (h instanceof MultiThreadedHop) {
                    MultiThreadedHop mhop = (MultiThreadedHop) h;
                    // set max constraint in hop
                    mhop.setMaxNumThreads(1);
                    // set optnode k (for explain)
                    c.setK(1);
                }
            } else
                rAssignRemainingParallelism(c, parforK, opsK);
        }
        // recompile statement block if required
        if (recompileSB) {
            try {
                // guaranteed to be a last-level block (see hop change)
                ProgramBlock pb = (ProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
                Recompiler.recompileProgramBlockInstructions(pb);
            } catch (Exception ex) {
                throw new DMLRuntimeException(ex);
            }
        }
    }
}
Also used : UnaryOp(org.apache.sysml.hops.UnaryOp) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Hop(org.apache.sysml.hops.Hop) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ParameterizedBuiltinOp(org.apache.sysml.hops.ParameterizedBuiltinOp) ReorgOp(org.apache.sysml.hops.ReorgOp) FunctionProgramBlock(org.apache.sysml.runtime.controlprogram.FunctionProgramBlock) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 89 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteEnableRuntimePiggybacking.

// /////
// REWRITE enable runtime piggybacking
// /
protected void rewriteEnableRuntimePiggybacking(OptNode n, LocalVariableMap vars, HashMap<String, PartitionFormat> partitionedMatrices) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    HashSet<String> sharedVars = new HashSet<>();
    boolean apply = false;
    // enable runtime piggybacking if MR jobs on shared read-only data set
    if (OptimizerUtils.ALLOW_RUNTIME_PIGGYBACKING) {
        // apply runtime piggybacking if hop in mr and shared input variable
        // (any input variabled which is not partitioned and is read only and applies)
        apply = rHasSharedMRInput(n, vars.keySet(), partitionedMatrices.keySet(), sharedVars) && // apply only if degree of parallelism > 1
        n.getTotalK() > 1;
    }
    if (apply)
        pfpb.setRuntimePiggybacking(apply);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'enable runtime piggybacking' - result=" + apply + " (" + ProgramConverter.serializeStringCollection(sharedVars) + ")");
}
Also used : ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Example 90 with ParForProgramBlock

use of org.apache.sysml.runtime.controlprogram.ParForProgramBlock in project systemml by apache.

the class OptimizerRuleBased method rewriteSetResultMerge.

// /////
// REWRITE set result merge
// /
protected void rewriteSetResultMerge(OptNode n, LocalVariableMap vars, boolean inLocal) {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    PResultMerge REMOTE = OptimizerUtils.isSparkExecutionMode() ? PResultMerge.REMOTE_SPARK : PResultMerge.REMOTE_MR;
    PResultMerge ret = null;
    // investigate details of current parfor node
    boolean flagRemoteParFOR = (n.getExecType() == getRemoteExecType());
    boolean flagLargeResult = hasLargeTotalResults(n, pfpb.getResultVariables(), vars, true);
    boolean flagRemoteLeftIndexing = hasResultMRLeftIndexing(n, pfpb.getResultVariables(), vars, true);
    boolean flagCellFormatWoCompare = determineFlagCellFormatWoCompare(pfpb.getResultVariables(), vars);
    boolean flagOnlyInMemResults = hasOnlyInMemoryResults(n, pfpb.getResultVariables(), vars, true);
    // MR, if remote exec, and w/compare (prevent huge transfer/merge costs)
    if (flagRemoteParFOR && flagLargeResult) {
        ret = REMOTE;
    } else // CP, if all results in mem
    if (flagOnlyInMemResults) {
        ret = PResultMerge.LOCAL_MEM;
    } else // benefit for large matrices outweigths potentially unnecessary MR jobs for smaller matrices)
    if ((flagRemoteParFOR || flagRemoteLeftIndexing) && !(flagCellFormatWoCompare && ResultMergeLocalFile.ALLOW_COPY_CELLFILES)) {
        ret = REMOTE;
    } else // CP, otherwise (decide later if in mem or file-based)
    {
        ret = PResultMerge.LOCAL_AUTOMATIC;
    }
    // modify rtprog
    pfpb.setResultMerge(ret);
    // modify plan
    n.addParam(ParamType.RESULT_MERGE, ret.toString());
    // recursively apply rewrite for parfor nodes
    if (n.getChilds() != null)
        rInvokeSetResultMerge(n.getChilds(), vars, inLocal && !flagRemoteParFOR);
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'set result merge' - result=" + ret);
}
Also used : PResultMerge(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PResultMerge) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Aggregations

ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)105 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)41 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)37 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)35 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)33 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)33 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)31 ArrayList (java.util.ArrayList)22 ParForStatementBlock (org.apache.sysml.parser.ParForStatementBlock)20 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)16 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)16 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)16 HashSet (java.util.HashSet)15 StatementBlock (org.apache.sysml.parser.StatementBlock)11 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)10 Instruction (org.apache.sysml.runtime.instructions.Instruction)10 DMLProgram (org.apache.sysml.parser.DMLProgram)8 IfStatementBlock (org.apache.sysml.parser.IfStatementBlock)8 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)8 Program (org.apache.sysml.runtime.controlprogram.Program)8