Search in sources :

Example 1 with ResultVar

use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.

the class ProgramConverter method serializeResultVariables.

public static String serializeResultVariables(ArrayList<ResultVar> vars) {
    StringBuilder sb = new StringBuilder();
    int count = 0;
    for (ResultVar var : vars) {
        if (count > 0)
            sb.append(ELEMENT_DELIM);
        sb.append(var._isAccum ? var._name + "+" : var._name);
        count++;
    }
    return sb.toString();
}
Also used : ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar)

Example 2 with ResultVar

use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.

the class OptimizerConstrained method optimize.

/**
 * Main optimization procedure.
 *
 * Transformation-based heuristic (rule-based) optimization
 * (no use of sb, direct change of pb).
 */
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
    LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
    OptNode pn = plan.getRoot();
    // early abort for empty parfor body
    if (pn.isLeaf())
        return true;
    // ANALYZE infrastructure properties
    super.analyzeProblemAndInfrastructure(pn);
    _cost = est;
    // debug and warnings output
    LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
    if (_rnk <= 0 || _rk <= 0)
        LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
    // ESTIMATE memory consumption
    ExecType oldET = pn.getExecType();
    int oldK = pn.getK();
    // for basic mem consumption
    pn.setSerialParFor();
    double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    pn.setExecType(oldET);
    pn.setK(oldK);
    LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
    // OPTIMIZE PARFOR PLAN
    // rewrite 1: data partitioning (incl. log. recompile RIX)
    HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
    rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
    // reestimate
    double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    // rewrite 2: remove unnecessary compare matrix
    rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
    // rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
    boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
    // reestimate
    double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
    // determine memory consumption for what-if: all-cp or partitioned
    double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
    LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
    double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
    LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
    // rewrite 4: execution strategy
    // keep old
    PExecMode tmpmode = getPExecMode(pn);
    boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
    // exec-type-specific rewrites
    if (pn.getExecType() == getRemoteExecType()) {
        if (M1 > _rm && M3 <= _rm) {
            // rewrite 1: data partitioning (apply conditional partitioning)
            rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        if (flagRecompMR) {
            // rewrite 5: set operations exec type
            rewriteSetOperationsExecType(pn, flagRecompMR);
            // reestimate
            M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
        }
        // rewrite 6: data colocation
        super.rewriteDataColocation(pn, ec.getVariables());
        // rewrite 7: rewrite set partition replication factor
        super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
        // rewrite 8: rewrite set partition replication factor
        super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        rewriteSetTaskPartitioner(pn, false, flagLIX);
        // rewrite 12: fused data partitioning and execution
        rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
        // rewrite 13: transpose sparse vector operations
        super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
        // rewrite 14:
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        // rewrite 15:
        super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
    } else // if( pn.getExecType() == ExecType.CP )
    {
        // rewrite 10: determine parallelism
        rewriteSetDegreeOfParallelism(pn, M1, false);
        // rewrite 11: task partitioning
        // flagLIX always false
        rewriteSetTaskPartitioner(pn, false, false);
        // rewrite 14: set in-place result indexing
        HashSet<ResultVar> inplaceResultVars = new HashSet<>();
        super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
        if (!OptimizerUtils.isSparkExecutionMode()) {
            // rewrite 16: runtime piggybacking
            super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
        } else {
            // rewrite 17: checkpoint injection for parfor loop body
            super.rewriteInjectSparkLoopCheckpointing(pn);
            // rewrite 18: repartition read-only inputs for zipmm
            super.rewriteInjectSparkRepartition(pn, ec.getVariables());
            // rewrite 19: eager caching for checkpoint rdds
            super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
        }
    }
    // rewrite 20: set result merge
    rewriteSetResultMerge(pn, ec.getVariables(), true);
    // rewrite 21: set local recompile memory budget
    super.rewriteSetRecompileMemoryBudget(pn);
    // /////
    // Final rewrites for cleanup / minor improvements
    // rewrite 22: parfor (in recursive functions) to for
    super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
    // rewrite 23: parfor (par=1) to for
    super.rewriteRemoveUnnecessaryParFor(pn);
    // info optimization result
    _numEvaluatedPlans = 1;
    return true;
}
Also used : HashMap(java.util.HashMap) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PExecMode(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PExecMode) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) ExecType(org.apache.sysml.runtime.controlprogram.parfor.opt.OptNode.ExecType) HashSet(java.util.HashSet)

Example 3 with ResultVar

use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.

the class OptimizerRuleBased method computeTotalSizeResultVariables.

private static double computeTotalSizeResultVariables(ArrayList<ResultVar> retVars, LocalVariableMap vars, int k) {
    double sum = 1;
    for (ResultVar var : retVars) {
        Data dat = vars.get(var._name);
        if (!(dat instanceof MatrixObject))
            continue;
        MatrixObject mo = (MatrixObject) dat;
        if (mo.getNnz() == 0)
            sum += OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), 1.0);
        else {
            // Every worker will consume memory for (MatrixSize/k + nnz) data.
            // This is applicable only when there is non-zero nnz.
            sum += (k + 1) * (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), Math.min((1.0 / k) + mo.getSparsity(), 1.0)));
        }
    }
    return sum;
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data)

Example 4 with ResultVar

use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.

the class OptimizerRuleBased method hasLargeTotalResults.

/**
 * Heuristically compute total result sizes, if larger than local mem budget assumed to be large.
 *
 * @param pn internal representation of a plan alternative for program blocks and instructions
 * @param resultVars list of result variables
 * @param vars local variable map
 * @param checkSize ?
 * @return true if result sizes larger than local memory budget
 */
protected boolean hasLargeTotalResults(OptNode pn, ArrayList<ResultVar> resultVars, LocalVariableMap vars, boolean checkSize) {
    double totalSize = 0;
    // get num tasks according to task partitioning
    PTaskPartitioner tp = PTaskPartitioner.valueOf(pn.getParam(ParamType.TASK_PARTITIONER));
    int k = pn.getK();
    long W = estimateNumTasks(tp, _N, k);
    for (ResultVar var : resultVars) {
        // Potential unknowns: for local result var of child parfor (but we're only interested in top level)
        // Potential scalars: for disabled dependency analysis and unbounded scoping
        Data dat = vars.get(var._name);
        if (dat != null && dat instanceof MatrixObject) {
            MatrixObject mo = (MatrixObject) dat;
            long rows = mo.getNumRows();
            long cols = mo.getNumColumns();
            long nnz = mo.getNnz();
            if (// w/ compare
            nnz > 0) {
                totalSize += W * OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
            } else // in total at most as dimensions (due to disjoint results)
            {
                totalSize += OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
            }
        }
    }
    // heuristic:  large if >= local mem budget
    return (totalSize >= _lm);
}
Also used : PTaskPartitioner(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PTaskPartitioner) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data)

Example 5 with ResultVar

use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteSetInPlaceResultIndexing.

// /////
// REWRITE set in-place result indexing
// /
protected void rewriteSetInPlaceResultIndexing(OptNode pn, double M, LocalVariableMap vars, HashSet<ResultVar> inPlaceResultVars, ExecutionContext ec) {
    // assertions (warnings of corrupt optimizer decisions)
    if (pn.getNodeType() != NodeType.PARFOR)
        LOG.warn(getOptMode() + " OPT: Set in-place result update is only applicable for a ParFor node.");
    boolean apply = false;
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
    // note currently we decide for all result vars jointly, i.e.,
    // only if all fit pinned in remaining budget, we apply this rewrite.
    ArrayList<ResultVar> retVars = pfpb.getResultVariables();
    // compute total sum of pinned result variable memory
    double sum = computeTotalSizeResultVariables(retVars, vars, pfpb.getDegreeOfParallelism());
    // NOTE: currently this rule is too conservative (the result variable is assumed to be dense and
    // most importantly counted twice if this is part of the maximum operation)
    double totalMem = Math.max((M + sum), rComputeSumMemoryIntermediates(pn, new HashSet<ResultVar>()));
    // optimization decision
    if (// basic correctness constraint
    rHasOnlyInPlaceSafeLeftIndexing(pn, retVars)) {
        // result update in-place for MR/Spark (w/ remote memory constraint)
        if ((pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR || pfpb.getExecMode() == PExecMode.REMOTE_SPARK_DP || pfpb.getExecMode() == PExecMode.REMOTE_SPARK) && totalMem < _rm) {
            apply = true;
        } else // result update in-place for CP (w/ local memory constraint)
        if (pfpb.getExecMode() == PExecMode.LOCAL && totalMem * pfpb.getDegreeOfParallelism() < _lm && // no forced mr/spark execution
        pn.isCPOnly()) {
            apply = true;
        }
    }
    // modify result variable meta data, if rewrite applied
    if (apply) {
        // will be serialized and transfered via symbol table
        for (ResultVar var : retVars) {
            Data dat = vars.get(var._name);
            if (dat instanceof MatrixObject)
                ((MatrixObject) dat).setUpdateType(UpdateType.INPLACE_PINNED);
        }
        inPlaceResultVars.addAll(retVars);
    }
    LOG.debug(getOptMode() + " OPT: rewrite 'set in-place result indexing' - result=" + apply + " (" + Arrays.toString(inPlaceResultVars.toArray(new ResultVar[0])) + ", M=" + toMB(totalMem) + ")");
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ResultVar(org.apache.sysml.parser.ParForStatementBlock.ResultVar) Data(org.apache.sysml.runtime.instructions.cp.Data) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock) HashSet(java.util.HashSet)

Aggregations

ResultVar (org.apache.sysml.parser.ParForStatementBlock.ResultVar)14 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)8 Data (org.apache.sysml.runtime.instructions.cp.Data)8 ParForProgramBlock (org.apache.sysml.runtime.controlprogram.ParForProgramBlock)5 ArrayList (java.util.ArrayList)3 ExternalFunctionProgramBlock (org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock)3 ForProgramBlock (org.apache.sysml.runtime.controlprogram.ForProgramBlock)3 FunctionProgramBlock (org.apache.sysml.runtime.controlprogram.FunctionProgramBlock)3 IfProgramBlock (org.apache.sysml.runtime.controlprogram.IfProgramBlock)3 ProgramBlock (org.apache.sysml.runtime.controlprogram.ProgramBlock)3 WhileProgramBlock (org.apache.sysml.runtime.controlprogram.WhileProgramBlock)3 HashSet (java.util.HashSet)2 DMLProgram (org.apache.sysml.parser.DMLProgram)2 Program (org.apache.sysml.runtime.controlprogram.Program)2 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)2 ExecutionContext (org.apache.sysml.runtime.controlprogram.context.ExecutionContext)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1