Search in sources :

Example 6 with CNodeTpl

use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project incubator-systemml by apache.

the class SpoofCompiler method rConstructModifiedHopDag.

private static void rConstructModifiedHopDag(Hop hop, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans, HashMap<Long, Pair<Hop[], Class<?>>> clas, HashSet<Long> memo) {
    if (memo.contains(hop.getHopID()))
        // already processed
        return;
    Hop hnew = hop;
    if (clas.containsKey(hop.getHopID())) {
        // replace sub-dag with generated operator
        Pair<Hop[], Class<?>> tmpCla = clas.get(hop.getHopID());
        CNodeTpl tmpCNode = cplans.get(hop.getHopID()).getValue();
        hnew = new SpoofFusedOp(hop.getName(), hop.getDataType(), hop.getValueType(), tmpCla.getValue(), false, tmpCNode.getOutputDimType());
        Hop[] inHops = tmpCla.getKey();
        for (int i = 0; i < inHops.length; i++) {
            if (tmpCNode instanceof CNodeOuterProduct && inHops[i].getHopID() == ((CNodeData) tmpCNode.getInput().get(2)).getHopID() && !TemplateUtils.hasTransposeParentUnderOuterProduct(inHops[i])) {
                hnew.addInput(HopRewriteUtils.createTranspose(inHops[i]));
            } else
                // add inputs
                hnew.addInput(inHops[i]);
        }
        // modify output parameters
        HopRewriteUtils.setOutputParameters(hnew, hop.getDim1(), hop.getDim2(), hop.getRowsInBlock(), hop.getColsInBlock(), hop.getNnz());
        if (tmpCNode instanceof CNodeOuterProduct && ((CNodeOuterProduct) tmpCNode).isTransposeOutput())
            hnew = HopRewriteUtils.createTranspose(hnew);
        else if (tmpCNode instanceof CNodeMultiAgg) {
            ArrayList<Hop> roots = ((CNodeMultiAgg) tmpCNode).getRootNodes();
            hnew.setDataType(DataType.MATRIX);
            HopRewriteUtils.setOutputParameters(hnew, 1, roots.size(), inHops[0].getRowsInBlock(), inHops[0].getColsInBlock(), -1);
            // inject artificial right indexing operations for all parents of all nodes
            for (int i = 0; i < roots.size(); i++) {
                Hop hnewi = (roots.get(i) instanceof AggUnaryOp) ? HopRewriteUtils.createScalarIndexing(hnew, 1, i + 1) : HopRewriteUtils.createIndexingOp(hnew, 1, i + 1);
                HopRewriteUtils.rewireAllParentChildReferences(roots.get(i), hnewi);
            }
        } else if (tmpCNode instanceof CNodeCell && ((CNodeCell) tmpCNode).requiredCastDtm()) {
            HopRewriteUtils.setOutputParametersForScalar(hnew);
            hnew = HopRewriteUtils.createUnary(hnew, OpOp1.CAST_AS_MATRIX);
        } else if (tmpCNode instanceof CNodeRow && (((CNodeRow) tmpCNode).getRowType() == RowType.NO_AGG_CONST || ((CNodeRow) tmpCNode).getRowType() == RowType.COL_AGG_CONST))
            ((SpoofFusedOp) hnew).setConstDim2(((CNodeRow) tmpCNode).getConstDim2());
        if (!(tmpCNode instanceof CNodeMultiAgg))
            HopRewriteUtils.rewireAllParentChildReferences(hop, hnew);
        memo.add(hnew.getHopID());
    }
    // process hops recursively (parent-child links modified)
    for (int i = 0; i < hnew.getInput().size(); i++) {
        Hop c = hnew.getInput().get(i);
        rConstructModifiedHopDag(c, cplans, clas, memo);
    }
    memo.add(hnew.getHopID());
}
Also used : CNodeTpl(org.apache.sysml.hops.codegen.cplan.CNodeTpl) Hop(org.apache.sysml.hops.Hop) ArrayList(java.util.ArrayList) CNodeCell(org.apache.sysml.hops.codegen.cplan.CNodeCell) CNodeOuterProduct(org.apache.sysml.hops.codegen.cplan.CNodeOuterProduct) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) CNodeRow(org.apache.sysml.hops.codegen.cplan.CNodeRow) CNodeMultiAgg(org.apache.sysml.hops.codegen.cplan.CNodeMultiAgg)

Example 7 with CNodeTpl

use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project systemml by apache.

the class SpoofCompiler method optimize.

/**
 * Main interface of sum-product optimizer, statement block dag.
 *
 * @param roots dag root nodes
 * @param recompile true if invoked during dynamic recompilation
 * @return dag root nodes of modified dag
 */
public static ArrayList<Hop> optimize(ArrayList<Hop> roots, boolean recompile) {
    if (roots == null || roots.isEmpty())
        return roots;
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    ArrayList<Hop> ret = roots;
    try {
        // context-sensitive literal replacement (only integers during recompile)
        boolean compileLiterals = (PLAN_CACHE_POLICY == PlanCachePolicy.CONSTANT) || !recompile;
        // candidate exploration of valid partial fusion plans
        CPlanMemoTable memo = new CPlanMemoTable();
        for (Hop hop : roots) rExploreCPlans(hop, memo, compileLiterals);
        // candidate selection of optimal fusion plan
        memo.pruneSuboptimal(roots);
        // construct actual cplan representations
        // note: we do not use the hop visit status due to jumps over fused operators which would
        // corrupt subsequent resets, leaving partial hops dags in visited status
        HashMap<Long, Pair<Hop[], CNodeTpl>> cplans = new LinkedHashMap<>();
        HashSet<Long> visited = new HashSet<>();
        for (Hop hop : roots) rConstructCPlans(hop, memo, cplans, compileLiterals, visited);
        // cleanup codegen plans (remove unnecessary inputs, fix hop-cnodedata mapping,
        // remove empty templates with single cnodedata input, remove spurious lookups,
        // perform common subexpression elimination)
        cplans = cleanupCPlans(memo, cplans);
        // explain before modification
        if (LOG.isTraceEnabled() && !cplans.isEmpty()) {
            // existing cplans
            LOG.trace("Codegen EXPLAIN (before optimize): \n" + Explain.explainHops(roots));
        }
        // source code generation for all cplans
        HashMap<Long, Pair<Hop[], Class<?>>> clas = new HashMap<>();
        for (Entry<Long, Pair<Hop[], CNodeTpl>> cplan : cplans.entrySet()) {
            Pair<Hop[], CNodeTpl> tmp = cplan.getValue();
            Class<?> cla = planCache.getPlan(tmp.getValue());
            if (cla == null) {
                // generate java source code
                String src = tmp.getValue().codegen(false);
                // explain debug output cplans or generated source code
                if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isHopsType(recompile)) {
                    LOG.info("Codegen EXPLAIN (generated cplan for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
                    LOG.info(tmp.getValue().getClassname() + Explain.explainCPlan(cplan.getValue().getValue()));
                }
                if (LOG.isTraceEnabled() || DMLScript.EXPLAIN.isRuntimeType(recompile)) {
                    LOG.info("Codegen EXPLAIN (generated code for HopID: " + cplan.getKey() + ", line " + tmp.getValue().getBeginLine() + ", hash=" + tmp.getValue().hashCode() + "):");
                    LOG.info(src);
                }
                // compile generated java source code
                cla = CodegenUtils.compileClass("codegen." + tmp.getValue().getClassname(), src);
                // maintain plan cache
                if (PLAN_CACHE_POLICY != PlanCachePolicy.NONE)
                    planCache.putPlan(tmp.getValue(), cla);
            } else if (DMLScript.STATISTICS) {
                Statistics.incrementCodegenOpCacheHits();
            }
            // make class available and maintain hits
            if (cla != null)
                clas.put(cplan.getKey(), new Pair<Hop[], Class<?>>(tmp.getKey(), cla));
            if (DMLScript.STATISTICS)
                Statistics.incrementCodegenOpCacheTotal();
        }
        // create modified hop dag (operator replacement and CSE)
        if (!cplans.isEmpty()) {
            // generate final hop dag
            ret = constructModifiedHopDag(roots, cplans, clas);
            // run common subexpression elimination and other rewrites
            ret = rewriteCSE.rewriteHopDAG(ret, new ProgramRewriteStatus());
            // explain after modification
            if (LOG.isTraceEnabled()) {
                LOG.trace("Codegen EXPLAIN (after optimize): \n" + Explain.explainHops(roots));
            }
        }
    } catch (Exception ex) {
        LOG.error("Codegen failed to optimize the following HOP DAG: \n" + Explain.explainHops(roots));
        throw new DMLRuntimeException(ex);
    }
    if (DMLScript.STATISTICS) {
        Statistics.incrementCodegenDAGCompile();
        Statistics.incrementCodegenCompileTime(System.nanoTime() - t0);
    }
    Hop.resetVisitStatus(roots);
    return ret;
}
Also used : CNodeTpl(org.apache.sysml.hops.codegen.cplan.CNodeTpl) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Hop(org.apache.sysml.hops.Hop) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LinkedHashMap(java.util.LinkedHashMap) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CPlanMemoTable(org.apache.sysml.hops.codegen.template.CPlanMemoTable) ProgramRewriteStatus(org.apache.sysml.hops.rewrite.ProgramRewriteStatus) Pair(org.apache.sysml.runtime.matrix.data.Pair) HashSet(java.util.HashSet)

Example 8 with CNodeTpl

use of org.apache.sysml.hops.codegen.cplan.CNodeTpl in project systemml by apache.

the class SpoofCompiler method rConstructModifiedHopDag.

private static void rConstructModifiedHopDag(Hop hop, HashMap<Long, Pair<Hop[], CNodeTpl>> cplans, HashMap<Long, Pair<Hop[], Class<?>>> clas, HashSet<Long> memo) {
    if (memo.contains(hop.getHopID()))
        // already processed
        return;
    Hop hnew = hop;
    if (clas.containsKey(hop.getHopID())) {
        // replace sub-dag with generated operator
        Pair<Hop[], Class<?>> tmpCla = clas.get(hop.getHopID());
        CNodeTpl tmpCNode = cplans.get(hop.getHopID()).getValue();
        hnew = new SpoofFusedOp(hop.getName(), hop.getDataType(), hop.getValueType(), tmpCla.getValue(), false, tmpCNode.getOutputDimType());
        Hop[] inHops = tmpCla.getKey();
        for (int i = 0; i < inHops.length; i++) {
            if (tmpCNode instanceof CNodeOuterProduct && inHops[i].getHopID() == ((CNodeData) tmpCNode.getInput().get(2)).getHopID() && !TemplateUtils.hasTransposeParentUnderOuterProduct(inHops[i])) {
                hnew.addInput(HopRewriteUtils.createTranspose(inHops[i]));
            } else
                // add inputs
                hnew.addInput(inHops[i]);
        }
        // modify output parameters
        HopRewriteUtils.setOutputParameters(hnew, hop.getDim1(), hop.getDim2(), hop.getRowsInBlock(), hop.getColsInBlock(), hop.getNnz());
        if (tmpCNode instanceof CNodeOuterProduct && ((CNodeOuterProduct) tmpCNode).isTransposeOutput())
            hnew = HopRewriteUtils.createTranspose(hnew);
        else if (tmpCNode instanceof CNodeMultiAgg) {
            ArrayList<Hop> roots = ((CNodeMultiAgg) tmpCNode).getRootNodes();
            hnew.setDataType(DataType.MATRIX);
            HopRewriteUtils.setOutputParameters(hnew, 1, roots.size(), inHops[0].getRowsInBlock(), inHops[0].getColsInBlock(), -1);
            // inject artificial right indexing operations for all parents of all nodes
            for (int i = 0; i < roots.size(); i++) {
                Hop hnewi = (roots.get(i) instanceof AggUnaryOp) ? HopRewriteUtils.createScalarIndexing(hnew, 1, i + 1) : HopRewriteUtils.createIndexingOp(hnew, 1, i + 1);
                HopRewriteUtils.rewireAllParentChildReferences(roots.get(i), hnewi);
            }
        } else if (tmpCNode instanceof CNodeCell && ((CNodeCell) tmpCNode).requiredCastDtm()) {
            HopRewriteUtils.setOutputParametersForScalar(hnew);
            hnew = HopRewriteUtils.createUnary(hnew, OpOp1.CAST_AS_MATRIX);
        } else if (tmpCNode instanceof CNodeRow && (((CNodeRow) tmpCNode).getRowType() == RowType.NO_AGG_CONST || ((CNodeRow) tmpCNode).getRowType() == RowType.COL_AGG_CONST))
            ((SpoofFusedOp) hnew).setConstDim2(((CNodeRow) tmpCNode).getConstDim2());
        if (!(tmpCNode instanceof CNodeMultiAgg))
            HopRewriteUtils.rewireAllParentChildReferences(hop, hnew);
        memo.add(hnew.getHopID());
    }
    // process hops recursively (parent-child links modified)
    for (int i = 0; i < hnew.getInput().size(); i++) {
        Hop c = hnew.getInput().get(i);
        rConstructModifiedHopDag(c, cplans, clas, memo);
    }
    memo.add(hnew.getHopID());
}
Also used : CNodeTpl(org.apache.sysml.hops.codegen.cplan.CNodeTpl) Hop(org.apache.sysml.hops.Hop) ArrayList(java.util.ArrayList) CNodeCell(org.apache.sysml.hops.codegen.cplan.CNodeCell) CNodeOuterProduct(org.apache.sysml.hops.codegen.cplan.CNodeOuterProduct) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) CNodeRow(org.apache.sysml.hops.codegen.cplan.CNodeRow) CNodeMultiAgg(org.apache.sysml.hops.codegen.cplan.CNodeMultiAgg)

Aggregations

Hop (org.apache.sysml.hops.Hop)8 CNodeTpl (org.apache.sysml.hops.codegen.cplan.CNodeTpl)8 LinkedHashMap (java.util.LinkedHashMap)6 Pair (org.apache.sysml.runtime.matrix.data.Pair)6 HashMap (java.util.HashMap)5 CNodeCell (org.apache.sysml.hops.codegen.cplan.CNodeCell)5 CNodeMultiAgg (org.apache.sysml.hops.codegen.cplan.CNodeMultiAgg)5 CNodeRow (org.apache.sysml.hops.codegen.cplan.CNodeRow)5 HashSet (java.util.HashSet)4 ArrayList (java.util.ArrayList)3 CNode (org.apache.sysml.hops.codegen.cplan.CNode)3 CNodeData (org.apache.sysml.hops.codegen.cplan.CNodeData)3 CNodeOuterProduct (org.apache.sysml.hops.codegen.cplan.CNodeOuterProduct)3 CPlanMemoTable (org.apache.sysml.hops.codegen.template.CPlanMemoTable)3 AggUnaryOp (org.apache.sysml.hops.AggUnaryOp)2 CPlanCSERewriter (org.apache.sysml.hops.codegen.template.CPlanCSERewriter)2 CPlanOpRewriter (org.apache.sysml.hops.codegen.template.CPlanOpRewriter)2 ProgramRewriteStatus (org.apache.sysml.hops.rewrite.ProgramRewriteStatus)2 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2