use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.
the class SpoofCompiler method cleanupCPlans.
/**
* Cleanup generated cplans in order to remove unnecessary inputs created
* during incremental construction. This is important as it avoids unnecessary
* redundant computation.
*
* @param cplans set of cplans
*/
private static HashMap<Long, Pair<Hop[], CNodeTpl>> cleanupCPlans(HashMap<Long, Pair<Hop[], CNodeTpl>> cplans) {
HashMap<Long, Pair<Hop[], CNodeTpl>> cplans2 = new HashMap<Long, Pair<Hop[], CNodeTpl>>();
for (Entry<Long, Pair<Hop[], CNodeTpl>> e : cplans.entrySet()) {
CNodeTpl tpl = e.getValue().getValue();
Hop[] inHops = e.getValue().getKey();
//collect cplan leaf node names
HashSet<Long> leafs = new HashSet<Long>();
if (tpl instanceof CNodeMultiAgg)
for (CNode out : ((CNodeMultiAgg) tpl).getOutputs()) rCollectLeafIDs(out, leafs);
else
rCollectLeafIDs(tpl.getOutput(), leafs);
//create clean cplan w/ minimal inputs
if (inHops.length == leafs.size())
cplans2.put(e.getKey(), e.getValue());
else {
tpl.cleanupInputs(leafs);
ArrayList<Hop> tmp = new ArrayList<Hop>();
for (Hop hop : inHops) {
if (hop != null && leafs.contains(hop.getHopID()))
tmp.add(hop);
}
cplans2.put(e.getKey(), new Pair<Hop[], CNodeTpl>(tmp.toArray(new Hop[0]), tpl));
}
//remove invalid plans with column indexing on main input
if (tpl instanceof CNodeCell) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
if (rHasLookupRC1(tpl.getOutput(), in1) || isLookupRC1(tpl.getOutput(), in1)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
} else if (tpl instanceof CNodeMultiAgg) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
for (CNode output : ((CNodeMultiAgg) tpl).getOutputs()) if (rHasLookupRC1(output, in1) || isLookupRC1(output, in1)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
}
//remove spurious lookups on main input of cell template
if (tpl instanceof CNodeCell || tpl instanceof CNodeOuterProduct) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
rFindAndRemoveLookup(tpl.getOutput(), in1);
} else if (tpl instanceof CNodeMultiAgg) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
rFindAndRemoveLookupMultiAgg((CNodeMultiAgg) tpl, in1);
}
//remove cplan w/ single op and w/o agg
if ((tpl instanceof CNodeCell && ((((CNodeCell) tpl).getCellType() == CellType.NO_AGG && TemplateUtils.hasSingleOperation(tpl)) || TemplateUtils.hasNoOperation(tpl))) || tpl instanceof CNodeRow && TemplateUtils.hasSingleOperation(tpl))
cplans2.remove(e.getKey());
//remove cplan if empty
if (tpl.getOutput() instanceof CNodeData)
cplans2.remove(e.getKey());
}
return cplans2;
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.
the class SpoofCompiler method constructCPlans.
////////////////////
// Codegen plan construction
private static HashMap<Long, Pair<Hop[], CNodeTpl>> constructCPlans(ArrayList<Hop> roots, boolean compileLiterals) throws DMLException {
//explore cplan candidates
CPlanMemoTable memo = new CPlanMemoTable();
for (Hop hop : roots) rExploreCPlans(hop, memo, compileLiterals);
//select optimal cplan candidates
memo.pruneSuboptimal(roots);
//construct actual cplan representations
//note: we do not use the hop visit status due to jumps over fused operators which would
//corrupt subsequent resets, leaving partial hops dags in visited status
LinkedHashMap<Long, Pair<Hop[], CNodeTpl>> ret = new LinkedHashMap<Long, Pair<Hop[], CNodeTpl>>();
HashSet<Long> visited = new HashSet<Long>();
for (Hop hop : roots) rConstructCPlans(hop, memo, ret, compileLiterals, visited);
return ret;
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.
the class SpoofCompiler method cleanupCPlans.
/**
* Cleanup generated cplans in order to remove unnecessary inputs created
* during incremental construction. This is important as it avoids unnecessary
* redundant computation.
*
* @param memo memoization table
* @param cplans set of cplans
*/
private static HashMap<Long, Pair<Hop[], CNodeTpl>> cleanupCPlans(CPlanMemoTable memo, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans) {
HashMap<Long, Pair<Hop[], CNodeTpl>> cplans2 = new HashMap<>();
CPlanOpRewriter rewriter = new CPlanOpRewriter();
CPlanCSERewriter cse = new CPlanCSERewriter();
for (Entry<Long, Pair<Hop[], CNodeTpl>> e : cplans.entrySet()) {
CNodeTpl tpl = e.getValue().getValue();
Hop[] inHops = e.getValue().getKey();
// remove invalid plans with null inputs
if (Arrays.stream(inHops).anyMatch(h -> (h == null)))
continue;
// perform simplifications and cse rewrites
tpl = rewriter.simplifyCPlan(tpl);
tpl = cse.eliminateCommonSubexpressions(tpl);
// update input hops (order-preserving)
HashSet<Long> inputHopIDs = tpl.getInputHopIDs(false);
inHops = Arrays.stream(inHops).filter(p -> p != null && inputHopIDs.contains(p.getHopID())).toArray(Hop[]::new);
cplans2.put(e.getKey(), new Pair<>(inHops, tpl));
// remove invalid plans with column indexing on main input
if (tpl instanceof CNodeCell || tpl instanceof CNodeRow) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
boolean inclRC1 = !(tpl instanceof CNodeRow);
if (rHasLookupRC1(tpl.getOutput(), in1, inclRC1) || isLookupRC1(tpl.getOutput(), in1, inclRC1)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
} else if (tpl instanceof CNodeMultiAgg) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
for (CNode output : ((CNodeMultiAgg) tpl).getOutputs()) if (rHasLookupRC1(output, in1, true) || isLookupRC1(output, in1, true)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
}
// remove invalid lookups on main input (all templates)
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
if (tpl instanceof CNodeMultiAgg)
rFindAndRemoveLookupMultiAgg((CNodeMultiAgg) tpl, in1);
else
rFindAndRemoveLookup(tpl.getOutput(), in1, !(tpl instanceof CNodeRow));
// remove invalid row templates (e.g., unsatisfied blocksize constraint)
if (tpl instanceof CNodeRow) {
// check for invalid row cplan over column vector
if (((CNodeRow) tpl).getRowType() == RowType.NO_AGG && tpl.getOutput().getDataType().isScalar()) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed invalid row cplan w/o agg on column vector.");
} else if (OptimizerUtils.isSparkExecutionMode()) {
Hop hop = memo.getHopRefs().get(e.getKey());
boolean isSpark = DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || OptimizerUtils.getTotalMemEstimate(inHops, hop, true) > OptimizerUtils.getLocalMemBudget();
boolean invalidNcol = hop.getDataType().isMatrix() && (HopRewriteUtils.isTransposeOperation(hop) ? hop.getDim1() > hop.getRowsInBlock() : hop.getDim2() > hop.getColsInBlock());
for (Hop in : inHops) invalidNcol |= (in.getDataType().isMatrix() && in.getDim2() > in.getColsInBlock());
if (isSpark && invalidNcol) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed invalid row cplan w/ ncol>ncolpb.");
}
}
}
// remove cplan w/ single op and w/o agg
if ((tpl instanceof CNodeCell && ((CNodeCell) tpl).getCellType() == CellType.NO_AGG && TemplateUtils.hasSingleOperation(tpl)) || (tpl instanceof CNodeRow && (((CNodeRow) tpl).getRowType() == RowType.NO_AGG || ((CNodeRow) tpl).getRowType() == RowType.NO_AGG_B1 || ((CNodeRow) tpl).getRowType() == RowType.ROW_AGG) && TemplateUtils.hasSingleOperation(tpl)) || TemplateUtils.hasNoOperation(tpl)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan with single operation.");
}
// remove cplan if empty
if (tpl.getOutput() instanceof CNodeData) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed empty cplan.");
}
// rename inputs (for codegen and plan caching)
tpl.renameInputs();
}
return cplans2;
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project systemml by apache.
the class SpoofCompiler method cleanupCPlans.
/**
* Cleanup generated cplans in order to remove unnecessary inputs created
* during incremental construction. This is important as it avoids unnecessary
* redundant computation.
*
* @param memo memoization table
* @param cplans set of cplans
*/
private static HashMap<Long, Pair<Hop[], CNodeTpl>> cleanupCPlans(CPlanMemoTable memo, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans) {
HashMap<Long, Pair<Hop[], CNodeTpl>> cplans2 = new HashMap<>();
CPlanOpRewriter rewriter = new CPlanOpRewriter();
CPlanCSERewriter cse = new CPlanCSERewriter();
for (Entry<Long, Pair<Hop[], CNodeTpl>> e : cplans.entrySet()) {
CNodeTpl tpl = e.getValue().getValue();
Hop[] inHops = e.getValue().getKey();
// remove invalid plans with null inputs
if (Arrays.stream(inHops).anyMatch(h -> (h == null)))
continue;
// perform simplifications and cse rewrites
tpl = rewriter.simplifyCPlan(tpl);
tpl = cse.eliminateCommonSubexpressions(tpl);
// update input hops (order-preserving)
HashSet<Long> inputHopIDs = tpl.getInputHopIDs(false);
inHops = Arrays.stream(inHops).filter(p -> p != null && inputHopIDs.contains(p.getHopID())).toArray(Hop[]::new);
cplans2.put(e.getKey(), new Pair<>(inHops, tpl));
// remove invalid plans with column indexing on main input
if (tpl instanceof CNodeCell || tpl instanceof CNodeRow) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
boolean inclRC1 = !(tpl instanceof CNodeRow);
if (rHasLookupRC1(tpl.getOutput(), in1, inclRC1) || isLookupRC1(tpl.getOutput(), in1, inclRC1)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
} else if (tpl instanceof CNodeMultiAgg) {
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
for (CNode output : ((CNodeMultiAgg) tpl).getOutputs()) if (rHasLookupRC1(output, in1, true) || isLookupRC1(output, in1, true)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
}
}
// remove invalid lookups on main input (all templates)
CNodeData in1 = (CNodeData) tpl.getInput().get(0);
if (tpl instanceof CNodeMultiAgg)
rFindAndRemoveLookupMultiAgg((CNodeMultiAgg) tpl, in1);
else
rFindAndRemoveLookup(tpl.getOutput(), in1, !(tpl instanceof CNodeRow));
// remove invalid row templates (e.g., unsatisfied blocksize constraint)
if (tpl instanceof CNodeRow) {
// check for invalid row cplan over column vector
if (((CNodeRow) tpl).getRowType() == RowType.NO_AGG && tpl.getOutput().getDataType().isScalar()) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed invalid row cplan w/o agg on column vector.");
} else if (OptimizerUtils.isSparkExecutionMode()) {
Hop hop = memo.getHopRefs().get(e.getKey());
boolean isSpark = DMLScript.rtplatform == RUNTIME_PLATFORM.SPARK || OptimizerUtils.getTotalMemEstimate(inHops, hop, true) > OptimizerUtils.getLocalMemBudget();
boolean invalidNcol = hop.getDataType().isMatrix() && (HopRewriteUtils.isTransposeOperation(hop) ? hop.getDim1() > hop.getRowsInBlock() : hop.getDim2() > hop.getColsInBlock());
for (Hop in : inHops) invalidNcol |= (in.getDataType().isMatrix() && in.getDim2() > in.getColsInBlock());
if (isSpark && invalidNcol) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed invalid row cplan w/ ncol>ncolpb.");
}
}
}
// remove cplan w/ single op and w/o agg
if ((tpl instanceof CNodeCell && ((CNodeCell) tpl).getCellType() == CellType.NO_AGG && TemplateUtils.hasSingleOperation(tpl)) || (tpl instanceof CNodeRow && (((CNodeRow) tpl).getRowType() == RowType.NO_AGG || ((CNodeRow) tpl).getRowType() == RowType.NO_AGG_B1 || ((CNodeRow) tpl).getRowType() == RowType.ROW_AGG) && TemplateUtils.hasSingleOperation(tpl)) || TemplateUtils.hasNoOperation(tpl)) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed cplan with single operation.");
}
// remove cplan if empty
if (tpl.getOutput() instanceof CNodeData) {
cplans2.remove(e.getKey());
if (LOG.isTraceEnabled())
LOG.trace("Removed empty cplan.");
}
// rename inputs (for codegen and plan caching)
tpl.renameInputs();
}
return cplans2;
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.
the class SpoofCompiler method optimize.
/**
* Main interface of sum-product optimizer, statement block dag.
*
* @param roots dag root nodes
* @param recompile true if invoked during dynamic recompilation
* @return dag root nodes of modified dag
*/
public static ArrayList<Hop> optimize(ArrayList<Hop> roots, boolean recompile) {
if (roots == null || roots.isEmpty())
return roots;
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
ArrayList<Hop> ret = roots;
try {
// context-sensitive literal replacement (only integers during recompile)
boolean compileLiterals = (PLAN_CACHE_POLICY == PlanCachePolicy.CONSTANT) || !recompile;
// candidate exploration of valid partial fusion plans
CPlanMemoTable memo = new CPlanMemoTable();
for (Hop hop : roots) rExploreCPlans(hop, memo, compileLiterals);
// candidate selection of optimal fusion plan
memo.pruneSuboptimal(roots);
// construct actual cplan representations
// note: we do not use the hop visit status due to jumps over fused operators which would
// corrupt subsequent resets, leaving partial hops dags in visited status
HashMap<Long, Pair<Hop[], CNodeTpl>> cplans = new LinkedHashMap<>();
HashSet<Long> visited = new HashSet<>();
for (Hop hop : roots) rConstructCPlans(hop, memo, cplans, compileLiterals, visited);
// cleanup codegen plans (remove unnecessary inputs, fix hop-cnodedata mapping,
// remove empty templates with single cnodedata input, remove spurious lookups,
// perform common subexpression elimination)
cplans = cleanupCPlans(memo, cplans);
// explain before modification
if (LOG.isTraceEnabled() && !cplans.isEmpty()) {
// existing cplans
LOG.trace("Codegen EXPLAIN (before optimize): \n" + Explain.explainHops(roots));
}
// source code generation for all cplans
HashMap<Long, Pair<Hop[], Class<?>>> clas = new HashMap<>();
for (Entry<Long, Pair<Hop[], CNodeTpl>> cplan : cplans.entrySet()) {
Pair<Hop[], CNodeTpl> tmp = cplan.getValue();
Class<?> cla = planCache.getPlan(tmp.getValue());
if (cla == null) {
// generate java source code
String src = tmp.getValue().codegen(false);
// explain debug output cplans or generated source code
if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isHopsType(recompile)) {
LOG.info("Codegen EXPLAIN (generated cplan for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
LOG.info(tmp.getValue().getClassname() + Explain.explainCPlan(cplan.getValue().getValue()));
}
if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isRuntimeType(recompile)) {
LOG.info("Codegen EXPLAIN (generated code for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
LOG.info(src);
}
// compile generated java source code
cla = CodegenUtils.compileClass("codegen." + tmp.getValue().getClassname(), src);
// maintain plan cache
if (PLAN_CACHE_POLICY != PlanCachePolicy.NONE)
planCache.putPlan(tmp.getValue(), cla);
} else if (DMLScript.STATISTICS) {
Statistics.incrementCodegenOpCacheHits();
}
// make class available and maintain hits
if (cla != null)
clas.put(cplan.getKey(), new Pair<Hop[], Class<?>>(tmp.getKey(), cla));
if (DMLScript.STATISTICS)
Statistics.incrementCodegenOpCacheTotal();
}
// create modified hop dag (operator replacement and CSE)
if (!cplans.isEmpty()) {
// generate final hop dag
ret = constructModifiedHopDag(roots, cplans, clas);
// run common subexpression elimination and other rewrites
ret = rewriteCSE.rewriteHopDAG(ret, new ProgramRewriteStatus());
// explain after modification
if (LOG.isTraceEnabled()) {
LOG.trace("Codegen EXPLAIN (after optimize): \n" + Explain.explainHops(roots));
}
}
} catch (Exception ex) {
LOG.error("Codegen failed to optimize the following HOP DAG: \n" + Explain.explainHops(roots));
throw new DMLRuntimeException(ex);
}
if (DMLScript.STATISTICS) {
Statistics.incrementCodegenDAGCompile();
Statistics.incrementCodegenCompileTime(System.nanoTime() - t0);
}
Hop.resetVisitStatus(roots);
return ret;
}
Aggregations