use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.
the class ProgramRecompiler method rFindAndRecompileIndexingHOP.
/**
* NOTE: if force is set, we set and recompile the respective indexing hops;
* otherwise, we release the forced exec type and recompile again. Hence,
* any changes can be exactly reverted with the same access behavior.
*
* @param sb statement block
* @param pb program block
* @param var variable
* @param ec execution context
* @param force if true, set and recompile the respective indexing hops
*/
public static void rFindAndRecompileIndexingHOP(StatementBlock sb, ProgramBlock pb, String var, ExecutionContext ec, boolean force) {
if (pb instanceof IfProgramBlock && sb instanceof IfStatementBlock) {
IfProgramBlock ipb = (IfProgramBlock) pb;
IfStatementBlock isb = (IfStatementBlock) sb;
IfStatement is = (IfStatement) sb.getStatement(0);
// process if condition
if (isb.getPredicateHops() != null)
ipb.setPredicate(rFindAndRecompileIndexingHOP(isb.getPredicateHops(), ipb.getPredicate(), var, ec, force));
// process if branch
int len = is.getIfBody().size();
for (int i = 0; i < ipb.getChildBlocksIfBody().size() && i < len; i++) {
ProgramBlock lpb = ipb.getChildBlocksIfBody().get(i);
StatementBlock lsb = is.getIfBody().get(i);
rFindAndRecompileIndexingHOP(lsb, lpb, var, ec, force);
}
// process else branch
if (ipb.getChildBlocksElseBody() != null) {
int len2 = is.getElseBody().size();
for (int i = 0; i < ipb.getChildBlocksElseBody().size() && i < len2; i++) {
ProgramBlock lpb = ipb.getChildBlocksElseBody().get(i);
StatementBlock lsb = is.getElseBody().get(i);
rFindAndRecompileIndexingHOP(lsb, lpb, var, ec, force);
}
}
} else if (pb instanceof WhileProgramBlock && sb instanceof WhileStatementBlock) {
WhileProgramBlock wpb = (WhileProgramBlock) pb;
WhileStatementBlock wsb = (WhileStatementBlock) sb;
WhileStatement ws = (WhileStatement) sb.getStatement(0);
// process while condition
if (wsb.getPredicateHops() != null)
wpb.setPredicate(rFindAndRecompileIndexingHOP(wsb.getPredicateHops(), wpb.getPredicate(), var, ec, force));
// process body
// robustness for potentially added problem blocks
int len = ws.getBody().size();
for (int i = 0; i < wpb.getChildBlocks().size() && i < len; i++) {
ProgramBlock lpb = wpb.getChildBlocks().get(i);
StatementBlock lsb = ws.getBody().get(i);
rFindAndRecompileIndexingHOP(lsb, lpb, var, ec, force);
}
} else if (// for or parfor
pb instanceof ForProgramBlock && sb instanceof ForStatementBlock) {
ForProgramBlock fpb = (ForProgramBlock) pb;
ForStatementBlock fsb = (ForStatementBlock) sb;
ForStatement fs = (ForStatement) fsb.getStatement(0);
if (fsb.getFromHops() != null)
fpb.setFromInstructions(rFindAndRecompileIndexingHOP(fsb.getFromHops(), fpb.getFromInstructions(), var, ec, force));
if (fsb.getToHops() != null)
fpb.setToInstructions(rFindAndRecompileIndexingHOP(fsb.getToHops(), fpb.getToInstructions(), var, ec, force));
if (fsb.getIncrementHops() != null)
fpb.setIncrementInstructions(rFindAndRecompileIndexingHOP(fsb.getIncrementHops(), fpb.getIncrementInstructions(), var, ec, force));
// process body
// robustness for potentially added problem blocks
int len = fs.getBody().size();
for (int i = 0; i < fpb.getChildBlocks().size() && i < len; i++) {
ProgramBlock lpb = fpb.getChildBlocks().get(i);
StatementBlock lsb = fs.getBody().get(i);
rFindAndRecompileIndexingHOP(lsb, lpb, var, ec, force);
}
} else // last level program block
{
try {
// process actual hops
boolean ret = false;
Hop.resetVisitStatus(sb.getHops());
if (force) {
// set forced execution type
for (Hop h : sb.getHops()) ret |= rFindAndSetCPIndexingHOP(h, var);
} else {
// release forced execution type
for (Hop h : sb.getHops()) ret |= rFindAndReleaseIndexingHOP(h, var);
}
// recompilation on-demand
if (ret) {
// construct new instructions
ArrayList<Instruction> newInst = Recompiler.recompileHopsDag(sb, sb.getHops(), ec.getVariables(), null, true, false, 0);
pb.setInstructions(newInst);
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
}
}
use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.
the class ProgramConverter method createIfStatementBlockCopy.
public static IfStatementBlock createIfStatementBlockCopy(IfStatementBlock sb, long pid, boolean plain, boolean forceDeepCopy) {
IfStatementBlock ret = null;
try {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.ALLOW_PARALLEL_DYN_RECOMPILATION) && // forced deep copy for function recompile
sb != null && (Recompiler.requiresRecompilation(sb.getPredicateHops()) || forceDeepCopy)) {
// create new statement (shallow copy livein/liveout for recompile, line numbers for explain)
ret = new IfStatementBlock();
ret.setDMLProg(sb.getDMLProg());
ret.setParseInfo(sb);
ret.setLiveIn(sb.liveIn());
ret.setLiveOut(sb.liveOut());
ret.setUpdatedVariables(sb.variablesUpdated());
ret.setReadVariables(sb.variablesRead());
// shallow copy child statements
ret.setStatements(sb.getStatements());
// deep copy predicate hops dag for concurrent recompile
Hop hops = Recompiler.deepCopyHopsDag(sb.getPredicateHops());
ret.setPredicateHops(hops);
ret.updatePredicateRecompilationFlag();
} else {
ret = sb;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return ret;
}
use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.
the class PlanAnalyzer method rCollectPartitionInputNodes.
private static void rCollectPartitionInputNodes(Hop current, HashSet<Long> visited, HashSet<Long> partition, HashSet<Long> I) {
// memoization (not via hops because in middle of dag)
if (visited.contains(current.getHopID()))
return;
// process children recursively
for (Hop c : current.getInput()) if (partition.contains(c.getHopID()))
rCollectPartitionInputNodes(c, visited, partition, I);
else
I.add(c.getHopID());
visited.add(current.getHopID());
}
use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.
the class PlanSelectionFuseCostBased method isValidMultiAggregate.
private static boolean isValidMultiAggregate(CPlanMemoTable memo, MemoTableEntry me) {
// ensure input consistent sizes (otherwise potential for incorrect results)
boolean ret = true;
Hop refSize = memo.getHopRefs().get(me.input1).getInput().get(0);
for (int i = 1; ret && i < 3; i++) {
if (me.isPlanRef(i))
ret &= HopRewriteUtils.isEqualSize(refSize, memo.getHopRefs().get(me.input(i)).getInput().get(0));
}
// they to not have potentially transitive parent child references
for (int i = 0; ret && i < 3; i++) if (me.isPlanRef(i)) {
HashSet<Long> probe = new HashSet<>();
for (int j = 0; j < 3; j++) if (i != j)
probe.add(me.input(j));
ret &= rCheckMultiAggregate(memo.getHopRefs().get(me.input(i)), probe);
}
return ret;
}
use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.
the class PlanSelectionFuseCostBased method rGetPlanCosts.
private static double rGetPlanCosts(CPlanMemoTable memo, Hop current, HashSet<Pair<Long, Long>> visited, HashSet<Long> partition, ArrayList<Long> M, boolean[] plan, HashMap<Long, Double> computeCosts, CostVector costsCurrent, TemplateType currentType) {
// memoization per hop id and cost vector to account for redundant
// computation without double counting materialized results or compute
// costs of complex operation DAGs within a single fused operator
Pair<Long, Long> tag = Pair.of(current.getHopID(), (costsCurrent == null) ? 0 : costsCurrent.ID);
if (visited.contains(tag))
return 0;
visited.add(tag);
// open template if necessary, including memoization
// under awareness of current plan choice
MemoTableEntry best = null;
boolean opened = false;
if (memo.contains(current.getHopID())) {
if (currentType == null) {
best = memo.get(current.getHopID()).stream().filter(p -> p.isValid()).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(new BasicPlanComparator()).orElse(null);
opened = true;
} else {
best = memo.get(current.getHopID()).stream().filter(p -> p.type == currentType || p.type == TemplateType.CELL).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(Comparator.comparing(p -> 7 - ((p.type == currentType) ? 4 : 0) - p.countPlanRefs())).orElse(null);
}
}
// create new cost vector if opened, initialized with write costs
CostVector costVect = !opened ? costsCurrent : new CostVector(Math.max(current.getDim1(), 1) * Math.max(current.getDim2(), 1));
// add compute costs of current operator to costs vector
if (partition.contains(current.getHopID()))
costVect.computeCosts += computeCosts.get(current.getHopID());
// process children recursively
double costs = 0;
for (int i = 0; i < current.getInput().size(); i++) {
Hop c = current.getInput().get(i);
if (best != null && best.isPlanRef(i))
costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, costVect, best.type);
else if (best != null && isImplicitlyFused(current, i, best.type))
costVect.addInputSize(c.getInput().get(0).getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
else {
// include children and I/O costs
costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, null, null);
if (costVect != null && c.getDataType().isMatrix())
costVect.addInputSize(c.getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
}
}
// add costs for opened fused operator
if (partition.contains(current.getHopID())) {
if (opened) {
if (LOG.isTraceEnabled())
LOG.trace("Cost vector for fused operator (hop " + current.getHopID() + "): " + costVect);
// time for output write
costs += costVect.outSize * 8 / WRITE_BANDWIDTH;
costs += Math.max(costVect.computeCosts * costVect.getMaxInputSize() / COMPUTE_BANDWIDTH, costVect.getSumInputSizes() * 8 / READ_BANDWIDTH);
} else // add costs for non-partition read in the middle of fused operator
if (hasNonPartitionConsumer(current, partition)) {
costs += rGetPlanCosts(memo, current, visited, partition, M, plan, computeCosts, null, null);
}
}
// sanity check non-negative costs
if (costs < 0 || Double.isNaN(costs) || Double.isInfinite(costs))
throw new RuntimeException("Wrong cost estimate: " + costs);
return costs;
}
Aggregations