use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.
the class SpoofCompiler method rConstructModifiedHopDag.
private static void rConstructModifiedHopDag(Hop hop, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans, HashMap<Long, Pair<Hop[], Class<?>>> clas, HashSet<Long> memo) {
if (memo.contains(hop.getHopID()))
// already processed
return;
Hop hnew = hop;
if (clas.containsKey(hop.getHopID())) {
// replace sub-dag with generated operator
Pair<Hop[], Class<?>> tmpCla = clas.get(hop.getHopID());
CNodeTpl tmpCNode = cplans.get(hop.getHopID()).getValue();
hnew = new SpoofFusedOp(hop.getName(), hop.getDataType(), hop.getValueType(), tmpCla.getValue(), false, tmpCNode.getOutputDimType());
Hop[] inHops = tmpCla.getKey();
for (int i = 0; i < inHops.length; i++) {
if (tmpCNode instanceof CNodeOuterProduct && inHops[i].getHopID() == ((CNodeData) tmpCNode.getInput().get(2)).getHopID() && !TemplateUtils.hasTransposeParentUnderOuterProduct(inHops[i])) {
hnew.addInput(HopRewriteUtils.createTranspose(inHops[i]));
} else
// add inputs
hnew.addInput(inHops[i]);
}
// modify output parameters
HopRewriteUtils.setOutputParameters(hnew, hop.getDim1(), hop.getDim2(), hop.getRowsInBlock(), hop.getColsInBlock(), hop.getNnz());
if (tmpCNode instanceof CNodeOuterProduct && ((CNodeOuterProduct) tmpCNode).isTransposeOutput())
hnew = HopRewriteUtils.createTranspose(hnew);
else if (tmpCNode instanceof CNodeMultiAgg) {
ArrayList<Hop> roots = ((CNodeMultiAgg) tmpCNode).getRootNodes();
hnew.setDataType(DataType.MATRIX);
HopRewriteUtils.setOutputParameters(hnew, 1, roots.size(), inHops[0].getRowsInBlock(), inHops[0].getColsInBlock(), -1);
// inject artificial right indexing operations for all parents of all nodes
for (int i = 0; i < roots.size(); i++) {
Hop hnewi = (roots.get(i) instanceof AggUnaryOp) ? HopRewriteUtils.createScalarIndexing(hnew, 1, i + 1) : HopRewriteUtils.createIndexingOp(hnew, 1, i + 1);
HopRewriteUtils.rewireAllParentChildReferences(roots.get(i), hnewi);
}
} else if (tmpCNode instanceof CNodeCell && ((CNodeCell) tmpCNode).requiredCastDtm()) {
HopRewriteUtils.setOutputParametersForScalar(hnew);
hnew = HopRewriteUtils.createUnary(hnew, OpOp1.CAST_AS_MATRIX);
} else if (tmpCNode instanceof CNodeRow && (((CNodeRow) tmpCNode).getRowType() == RowType.NO_AGG_CONST || ((CNodeRow) tmpCNode).getRowType() == RowType.COL_AGG_CONST))
((SpoofFusedOp) hnew).setConstDim2(((CNodeRow) tmpCNode).getConstDim2());
if (!(tmpCNode instanceof CNodeMultiAgg))
HopRewriteUtils.rewireAllParentChildReferences(hop, hnew);
memo.add(hnew.getHopID());
}
// process hops recursively (parent-child links modified)
for (int i = 0; i < hnew.getInput().size(); i++) {
Hop c = hnew.getInput().get(i);
rConstructModifiedHopDag(c, cplans, clas, memo);
}
memo.add(hnew.getHopID());
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project systemml by apache.
the class SpoofCompiler method optimize.
/**
* Main interface of sum-product optimizer, statement block dag.
*
* @param roots dag root nodes
* @param recompile true if invoked during dynamic recompilation
* @return dag root nodes of modified dag
*/
public static ArrayList<Hop> optimize(ArrayList<Hop> roots, boolean recompile) {
if (roots == null || roots.isEmpty())
return roots;
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
ArrayList<Hop> ret = roots;
try {
// context-sensitive literal replacement (only integers during recompile)
boolean compileLiterals = (PLAN_CACHE_POLICY == PlanCachePolicy.CONSTANT) || !recompile;
// candidate exploration of valid partial fusion plans
CPlanMemoTable memo = new CPlanMemoTable();
for (Hop hop : roots) rExploreCPlans(hop, memo, compileLiterals);
// candidate selection of optimal fusion plan
memo.pruneSuboptimal(roots);
// construct actual cplan representations
// note: we do not use the hop visit status due to jumps over fused operators which would
// corrupt subsequent resets, leaving partial hops dags in visited status
HashMap<Long, Pair<Hop[], CNodeTpl>> cplans = new LinkedHashMap<>();
HashSet<Long> visited = new HashSet<>();
for (Hop hop : roots) rConstructCPlans(hop, memo, cplans, compileLiterals, visited);
// cleanup codegen plans (remove unnecessary inputs, fix hop-cnodedata mapping,
// remove empty templates with single cnodedata input, remove spurious lookups,
// perform common subexpression elimination)
cplans = cleanupCPlans(memo, cplans);
// explain before modification
if (LOG.isTraceEnabled() && !cplans.isEmpty()) {
// existing cplans
LOG.trace("Codegen EXPLAIN (before optimize): \n" + Explain.explainHops(roots));
}
// source code generation for all cplans
HashMap<Long, Pair<Hop[], Class<?>>> clas = new HashMap<>();
for (Entry<Long, Pair<Hop[], CNodeTpl>> cplan : cplans.entrySet()) {
Pair<Hop[], CNodeTpl> tmp = cplan.getValue();
Class<?> cla = planCache.getPlan(tmp.getValue());
if (cla == null) {
// generate java source code
String src = tmp.getValue().codegen(false);
// explain debug output cplans or generated source code
if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isHopsType(recompile)) {
LOG.info("Codegen EXPLAIN (generated cplan for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
LOG.info(tmp.getValue().getClassname() + Explain.explainCPlan(cplan.getValue().getValue()));
}
if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isRuntimeType(recompile)) {
LOG.info("Codegen EXPLAIN (generated code for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
LOG.info(src);
}
// compile generated java source code
cla = CodegenUtils.compileClass("codegen." + tmp.getValue().getClassname(), src);
// maintain plan cache
if (PLAN_CACHE_POLICY != PlanCachePolicy.NONE)
planCache.putPlan(tmp.getValue(), cla);
} else if (DMLScript.STATISTICS) {
Statistics.incrementCodegenOpCacheHits();
}
// make class available and maintain hits
if (cla != null)
clas.put(cplan.getKey(), new Pair<Hop[], Class<?>>(tmp.getKey(), cla));
if (DMLScript.STATISTICS)
Statistics.incrementCodegenOpCacheTotal();
}
// create modified hop dag (operator replacement and CSE)
if (!cplans.isEmpty()) {
// generate final hop dag
ret = constructModifiedHopDag(roots, cplans, clas);
// run common subexpression elimination and other rewrites
ret = rewriteCSE.rewriteHopDAG(ret, new ProgramRewriteStatus());
// explain after modification
if (LOG.isTraceEnabled()) {
LOG.trace("Codegen EXPLAIN (after optimize): \n" + Explain.explainHops(roots));
}
}
} catch (Exception ex) {
LOG.error("Codegen failed to optimize the following HOP DAG: \n" + Explain.explainHops(roots));
throw new DMLRuntimeException(ex);
}
if (DMLScript.STATISTICS) {
Statistics.incrementCodegenDAGCompile();
Statistics.incrementCodegenCompileTime(System.nanoTime() - t0);
}
Hop.resetVisitStatus(roots);
return ret;
}
use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project systemml by apache.
the class SpoofCompiler method rConstructModifiedHopDag.
private static void rConstructModifiedHopDag(Hop hop, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans, HashMap<Long, Pair<Hop[], Class<?>>> clas, HashSet<Long> memo) {
if (memo.contains(hop.getHopID()))
// already processed
return;
Hop hnew = hop;
if (clas.containsKey(hop.getHopID())) {
// replace sub-dag with generated operator
Pair<Hop[], Class<?>> tmpCla = clas.get(hop.getHopID());
CNodeTpl tmpCNode = cplans.get(hop.getHopID()).getValue();
hnew = new SpoofFusedOp(hop.getName(), hop.getDataType(), hop.getValueType(), tmpCla.getValue(), false, tmpCNode.getOutputDimType());
Hop[] inHops = tmpCla.getKey();
for (int i = 0; i < inHops.length; i++) {
if (tmpCNode instanceof CNodeOuterProduct && inHops[i].getHopID() == ((CNodeData) tmpCNode.getInput().get(2)).getHopID() && !TemplateUtils.hasTransposeParentUnderOuterProduct(inHops[i])) {
hnew.addInput(HopRewriteUtils.createTranspose(inHops[i]));
} else
// add inputs
hnew.addInput(inHops[i]);
}
// modify output parameters
HopRewriteUtils.setOutputParameters(hnew, hop.getDim1(), hop.getDim2(), hop.getRowsInBlock(), hop.getColsInBlock(), hop.getNnz());
if (tmpCNode instanceof CNodeOuterProduct && ((CNodeOuterProduct) tmpCNode).isTransposeOutput())
hnew = HopRewriteUtils.createTranspose(hnew);
else if (tmpCNode instanceof CNodeMultiAgg) {
ArrayList<Hop> roots = ((CNodeMultiAgg) tmpCNode).getRootNodes();
hnew.setDataType(DataType.MATRIX);
HopRewriteUtils.setOutputParameters(hnew, 1, roots.size(), inHops[0].getRowsInBlock(), inHops[0].getColsInBlock(), -1);
// inject artificial right indexing operations for all parents of all nodes
for (int i = 0; i < roots.size(); i++) {
Hop hnewi = (roots.get(i) instanceof AggUnaryOp) ? HopRewriteUtils.createScalarIndexing(hnew, 1, i + 1) : HopRewriteUtils.createIndexingOp(hnew, 1, i + 1);
HopRewriteUtils.rewireAllParentChildReferences(roots.get(i), hnewi);
}
} else if (tmpCNode instanceof CNodeCell && ((CNodeCell) tmpCNode).requiredCastDtm()) {
HopRewriteUtils.setOutputParametersForScalar(hnew);
hnew = HopRewriteUtils.createUnary(hnew, OpOp1.CAST_AS_MATRIX);
} else if (tmpCNode instanceof CNodeRow && (((CNodeRow) tmpCNode).getRowType() == RowType.NO_AGG_CONST || ((CNodeRow) tmpCNode).getRowType() == RowType.COL_AGG_CONST))
((SpoofFusedOp) hnew).setConstDim2(((CNodeRow) tmpCNode).getConstDim2());
if (!(tmpCNode instanceof CNodeMultiAgg))
HopRewriteUtils.rewireAllParentChildReferences(hop, hnew);
memo.add(hnew.getHopID());
}
// process hops recursively (parent-child links modified)
for (int i = 0; i < hnew.getInput().size(); i++) {
Hop c = hnew.getInput().get(i);
rConstructModifiedHopDag(c, cplans, clas, memo);
}
memo.add(hnew.getHopID());
}
Aggregations