use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.
the class ProgramConverter method serializeResultVariables.
public static String serializeResultVariables(ArrayList<ResultVar> vars) {
StringBuilder sb = new StringBuilder();
int count = 0;
for (ResultVar var : vars) {
if (count > 0)
sb.append(ELEMENT_DELIM);
sb.append(var._isAccum ? var._name + "+" : var._name);
count++;
}
return sb.toString();
}
use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.
the class OptimizerConstrained method optimize.
/**
* Main optimization procedure.
*
* Transformation-based heuristic (rule-based) optimization
* (no use of sb, direct change of pb).
*/
@Override
public boolean optimize(ParForStatementBlock sb, ParForProgramBlock pb, OptTree plan, CostEstimator est, ExecutionContext ec) {
LOG.debug("--- " + getOptMode() + " OPTIMIZER -------");
OptNode pn = plan.getRoot();
// early abort for empty parfor body
if (pn.isLeaf())
return true;
// ANALYZE infrastructure properties
super.analyzeProblemAndInfrastructure(pn);
_cost = est;
// debug and warnings output
LOG.debug(getOptMode() + " OPT: Optimize with local_max_mem=" + toMB(_lm) + " and remote_max_mem=" + toMB(_rm) + ").");
if (_rnk <= 0 || _rk <= 0)
LOG.warn(getOptMode() + " OPT: Optimize for inactive cluster (num_nodes=" + _rnk + ", num_map_slots=" + _rk + ").");
// ESTIMATE memory consumption
ExecType oldET = pn.getExecType();
int oldK = pn.getK();
// for basic mem consumption
pn.setSerialParFor();
double M0a = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
pn.setExecType(oldET);
pn.setK(oldK);
LOG.debug(getOptMode() + " OPT: estimated mem (serial exec) M=" + toMB(M0a));
// OPTIMIZE PARFOR PLAN
// rewrite 1: data partitioning (incl. log. recompile RIX)
HashMap<String, PartitionFormat> partitionedMatrices = new HashMap<>();
rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, OptimizerUtils.getLocalMemBudget());
// reestimate
double M0b = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
// rewrite 2: remove unnecessary compare matrix
rewriteRemoveUnnecessaryCompareMatrix(pn, ec);
// rewrite 3: rewrite result partitioning (incl. log/phy recompile LIX)
boolean flagLIX = super.rewriteSetResultPartitioning(pn, M0b, ec.getVariables());
// reestimate
double M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec) M=" + toMB(M1));
// determine memory consumption for what-if: all-cp or partitioned
double M2 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, LopProperties.ExecType.CP);
LOG.debug(getOptMode() + " OPT: estimated new mem (serial exec, all CP) M=" + toMB(M2));
double M3 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn, true);
LOG.debug(getOptMode() + " OPT: estimated new mem (cond partitioning) M=" + toMB(M3));
// rewrite 4: execution strategy
// keep old
PExecMode tmpmode = getPExecMode(pn);
boolean flagRecompMR = rewriteSetExecutionStategy(pn, M0a, M1, M2, M3, flagLIX);
// exec-type-specific rewrites
if (pn.getExecType() == getRemoteExecType()) {
if (M1 > _rm && M3 <= _rm) {
// rewrite 1: data partitioning (apply conditional partitioning)
rewriteSetDataPartitioner(pn, ec.getVariables(), partitionedMatrices, M3);
// reestimate
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
}
if (flagRecompMR) {
// rewrite 5: set operations exec type
rewriteSetOperationsExecType(pn, flagRecompMR);
// reestimate
M1 = _cost.getEstimate(TestMeasure.MEMORY_USAGE, pn);
}
// rewrite 6: data colocation
super.rewriteDataColocation(pn, ec.getVariables());
// rewrite 7: rewrite set partition replication factor
super.rewriteSetPartitionReplicationFactor(pn, partitionedMatrices, ec.getVariables());
// rewrite 8: rewrite set partition replication factor
super.rewriteSetExportReplicationFactor(pn, ec.getVariables());
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism(pn, M1, false);
// rewrite 11: task partitioning
rewriteSetTaskPartitioner(pn, false, flagLIX);
// rewrite 12: fused data partitioning and execution
rewriteSetFusedDataPartitioningExecution(pn, M1, flagLIX, partitionedMatrices, ec.getVariables(), tmpmode);
// rewrite 13: transpose sparse vector operations
super.rewriteSetTranposeSparseVectorOperations(pn, partitionedMatrices, ec.getVariables());
// rewrite 14:
HashSet<ResultVar> inplaceResultVars = new HashSet<>();
super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
// rewrite 15:
super.rewriteDisableCPCaching(pn, inplaceResultVars, ec.getVariables());
} else // if( pn.getExecType() == ExecType.CP )
{
// rewrite 10: determine parallelism
rewriteSetDegreeOfParallelism(pn, M1, false);
// rewrite 11: task partitioning
// flagLIX always false
rewriteSetTaskPartitioner(pn, false, false);
// rewrite 14: set in-place result indexing
HashSet<ResultVar> inplaceResultVars = new HashSet<>();
super.rewriteSetInPlaceResultIndexing(pn, M1, ec.getVariables(), inplaceResultVars, ec);
if (!OptimizerUtils.isSparkExecutionMode()) {
// rewrite 16: runtime piggybacking
super.rewriteEnableRuntimePiggybacking(pn, ec.getVariables(), partitionedMatrices);
} else {
// rewrite 17: checkpoint injection for parfor loop body
super.rewriteInjectSparkLoopCheckpointing(pn);
// rewrite 18: repartition read-only inputs for zipmm
super.rewriteInjectSparkRepartition(pn, ec.getVariables());
// rewrite 19: eager caching for checkpoint rdds
super.rewriteSetSparkEagerRDDCaching(pn, ec.getVariables());
}
}
// rewrite 20: set result merge
rewriteSetResultMerge(pn, ec.getVariables(), true);
// rewrite 21: set local recompile memory budget
super.rewriteSetRecompileMemoryBudget(pn);
// /////
// Final rewrites for cleanup / minor improvements
// rewrite 22: parfor (in recursive functions) to for
super.rewriteRemoveRecursiveParFor(pn, ec.getVariables());
// rewrite 23: parfor (par=1) to for
super.rewriteRemoveUnnecessaryParFor(pn);
// info optimization result
_numEvaluatedPlans = 1;
return true;
}
use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.
the class OptimizerRuleBased method computeTotalSizeResultVariables.
private static double computeTotalSizeResultVariables(ArrayList<ResultVar> retVars, LocalVariableMap vars, int k) {
double sum = 1;
for (ResultVar var : retVars) {
Data dat = vars.get(var._name);
if (!(dat instanceof MatrixObject))
continue;
MatrixObject mo = (MatrixObject) dat;
if (mo.getNnz() == 0)
sum += OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), 1.0);
else {
// Every worker will consume memory for (MatrixSize/k + nnz) data.
// This is applicable only when there is non-zero nnz.
sum += (k + 1) * (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), Math.min((1.0 / k) + mo.getSparsity(), 1.0)));
}
}
return sum;
}
use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.
the class OptimizerRuleBased method hasLargeTotalResults.
/**
* Heuristically compute total result sizes, if larger than local mem budget assumed to be large.
*
* @param pn internal representation of a plan alternative for program blocks and instructions
* @param resultVars list of result variables
* @param vars local variable map
* @param checkSize ?
* @return true if result sizes larger than local memory budget
*/
protected boolean hasLargeTotalResults(OptNode pn, ArrayList<ResultVar> resultVars, LocalVariableMap vars, boolean checkSize) {
double totalSize = 0;
// get num tasks according to task partitioning
PTaskPartitioner tp = PTaskPartitioner.valueOf(pn.getParam(ParamType.TASK_PARTITIONER));
int k = pn.getK();
long W = estimateNumTasks(tp, _N, k);
for (ResultVar var : resultVars) {
// Potential unknowns: for local result var of child parfor (but we're only interested in top level)
// Potential scalars: for disabled dependency analysis and unbounded scoping
Data dat = vars.get(var._name);
if (dat != null && dat instanceof MatrixObject) {
MatrixObject mo = (MatrixObject) dat;
long rows = mo.getNumRows();
long cols = mo.getNumColumns();
long nnz = mo.getNnz();
if (// w/ compare
nnz > 0) {
totalSize += W * OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
} else // in total at most as dimensions (due to disjoint results)
{
totalSize += OptimizerUtils.estimateSizeExactSparsity(rows, cols, 1.0);
}
}
}
// heuristic: large if >= local mem budget
return (totalSize >= _lm);
}
use of org.apache.sysml.parser.ParForStatementBlock.ResultVar in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteSetInPlaceResultIndexing.
// /////
// REWRITE set in-place result indexing
// /
protected void rewriteSetInPlaceResultIndexing(OptNode pn, double M, LocalVariableMap vars, HashSet<ResultVar> inPlaceResultVars, ExecutionContext ec) {
// assertions (warnings of corrupt optimizer decisions)
if (pn.getNodeType() != NodeType.PARFOR)
LOG.warn(getOptMode() + " OPT: Set in-place result update is only applicable for a ParFor node.");
boolean apply = false;
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(pn.getID())[1];
// note currently we decide for all result vars jointly, i.e.,
// only if all fit pinned in remaining budget, we apply this rewrite.
ArrayList<ResultVar> retVars = pfpb.getResultVariables();
// compute total sum of pinned result variable memory
double sum = computeTotalSizeResultVariables(retVars, vars, pfpb.getDegreeOfParallelism());
// NOTE: currently this rule is too conservative (the result variable is assumed to be dense and
// most importantly counted twice if this is part of the maximum operation)
double totalMem = Math.max((M + sum), rComputeSumMemoryIntermediates(pn, new HashSet<ResultVar>()));
// optimization decision
if (// basic correctness constraint
rHasOnlyInPlaceSafeLeftIndexing(pn, retVars)) {
// result update in-place for MR/Spark (w/ remote memory constraint)
if ((pfpb.getExecMode() == PExecMode.REMOTE_MR_DP || pfpb.getExecMode() == PExecMode.REMOTE_MR || pfpb.getExecMode() == PExecMode.REMOTE_SPARK_DP || pfpb.getExecMode() == PExecMode.REMOTE_SPARK) && totalMem < _rm) {
apply = true;
} else // result update in-place for CP (w/ local memory constraint)
if (pfpb.getExecMode() == PExecMode.LOCAL && totalMem * pfpb.getDegreeOfParallelism() < _lm && // no forced mr/spark execution
pn.isCPOnly()) {
apply = true;
}
}
// modify result variable meta data, if rewrite applied
if (apply) {
// will be serialized and transfered via symbol table
for (ResultVar var : retVars) {
Data dat = vars.get(var._name);
if (dat instanceof MatrixObject)
((MatrixObject) dat).setUpdateType(UpdateType.INPLACE_PINNED);
}
inPlaceResultVars.addAll(retVars);
}
LOG.debug(getOptMode() + " OPT: rewrite 'set in-place result indexing' - result=" + apply + " (" + Arrays.toString(inPlaceResultVars.toArray(new ResultVar[0])) + ", M=" + toMB(totalMem) + ")");
}
Aggregations