Search in sources :

Example 11 with Hop

use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.

the class PlanSelectionFuseCostBased method createAndAddMultiAggPlans.

//across-partition multi-agg templates with shared reads
private void createAndAddMultiAggPlans(CPlanMemoTable memo, ArrayList<Hop> roots) {
    //collect full aggregations as initial set of candidates
    HashSet<Long> fullAggs = new HashSet<Long>();
    Hop.resetVisitStatus(roots);
    for (Hop hop : roots) rCollectFullAggregates(hop, fullAggs);
    Hop.resetVisitStatus(roots);
    //remove operators with assigned multi-agg plans
    Iterator<Long> iter = fullAggs.iterator();
    while (iter.hasNext()) {
        if (memo.contains(iter.next(), TemplateType.MultiAggTpl))
            iter.remove();
    }
    //check applicability for further analysis
    if (fullAggs.size() <= 1)
        return;
    if (LOG.isTraceEnabled()) {
        LOG.trace("Found across-partition ua(RC) aggregations: " + Arrays.toString(fullAggs.toArray(new Long[0])));
    }
    //collect information for all candidates 
    //(subsumed aggregations, and inputs to fused operators) 
    List<AggregateInfo> aggInfos = new ArrayList<AggregateInfo>();
    for (Long hopID : fullAggs) {
        Hop aggHop = memo._hopRefs.get(hopID);
        AggregateInfo tmp = new AggregateInfo(aggHop);
        for (Hop c : aggHop.getInput()) rExtractAggregateInfo(memo, c, tmp, TemplateType.CellTpl);
        if (tmp._fusedInputs.isEmpty())
            tmp.addFusedInput(aggHop.getInput().get(0).getHopID());
        aggInfos.add(tmp);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("Extracted across-partition ua(RC) aggregation info: ");
        for (AggregateInfo info : aggInfos) LOG.trace(info);
    }
    //filter aggregations w/ matmults to ensure consistent dims
    //sort aggregations by num dependencies to simplify merging
    //clusters of aggregations with parallel dependencies
    aggInfos = aggInfos.stream().filter(a -> !a.containsMatMult).sorted(Comparator.comparing(a -> a._inputAggs.size())).collect(Collectors.toList());
    //greedy grouping of multi-agg candidates
    boolean converged = false;
    while (!converged) {
        AggregateInfo merged = null;
        for (int i = 0; i < aggInfos.size(); i++) {
            AggregateInfo current = aggInfos.get(i);
            for (int j = i + 1; j < aggInfos.size(); j++) {
                AggregateInfo that = aggInfos.get(j);
                if (current.isMergable(that)) {
                    merged = current.merge(that);
                    aggInfos.remove(j);
                    j--;
                }
            }
        }
        converged = (merged == null);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("Merged across-partition ua(RC) aggregation info: ");
        for (AggregateInfo info : aggInfos) LOG.trace(info);
    }
    //construct and add multiagg template plans (w/ max 3 aggregations)
    for (AggregateInfo info : aggInfos) {
        if (info._aggregates.size() <= 1)
            continue;
        Long[] aggs = info._aggregates.keySet().toArray(new Long[0]);
        MemoTableEntry me = new MemoTableEntry(TemplateType.MultiAggTpl, aggs[0], aggs[1], (aggs.length > 2) ? aggs[2] : -1);
        for (int i = 0; i < aggs.length; i++) {
            memo.add(memo._hopRefs.get(aggs[i]), me);
            addBestPlan(aggs[i], me);
            if (LOG.isTraceEnabled())
                LOG.trace("Added multiagg* plan: " + aggs[i] + " " + me);
        }
    }
}
Also used : Arrays(java.util.Arrays) IndexingOp(org.apache.sysml.hops.IndexingOp) HashMap(java.util.HashMap) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) ParameterizedBuiltinOp(org.apache.sysml.hops.ParameterizedBuiltinOp) AggOp(org.apache.sysml.hops.Hop.AggOp) ArrayList(java.util.ArrayList) LiteralOp(org.apache.sysml.hops.LiteralOp) HashSet(java.util.HashSet) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Pair(org.apache.commons.lang3.tuple.Pair) ReorgOp(org.apache.sysml.hops.ReorgOp) IDSequence(org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence) CollectionUtils(org.apache.commons.collections.CollectionUtils) InfrastructureAnalyzer(org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer) AggBinaryOp(org.apache.sysml.hops.AggBinaryOp) TernaryOp(org.apache.sysml.hops.TernaryOp) Iterator(java.util.Iterator) Collection(java.util.Collection) TemplateType(org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType) BinaryOp(org.apache.sysml.hops.BinaryOp) Collectors(java.util.stream.Collectors) Direction(org.apache.sysml.hops.Hop.Direction) Hop(org.apache.sysml.hops.Hop) List(java.util.List) Entry(java.util.Map.Entry) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) Comparator(java.util.Comparator) HopRewriteUtils(org.apache.sysml.hops.rewrite.HopRewriteUtils) UnaryOp(org.apache.sysml.hops.UnaryOp) Hop(org.apache.sysml.hops.Hop) ArrayList(java.util.ArrayList) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) HashSet(java.util.HashSet)

Example 12 with Hop

use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.

the class PlanSelectionFuseCostBased method rGetPlanCosts.

private static double rGetPlanCosts(CPlanMemoTable memo, Hop current, HashSet<Pair<Long, Long>> visited, HashSet<Long> partition, ArrayList<Long> M, boolean[] plan, HashMap<Long, Double> computeCosts, CostVector costsCurrent, TemplateType currentType) {
    //memoization per hop id and cost vector to account for redundant
    //computation without double counting materialized results or compute
    //costs of complex operation DAGs within a single fused operator
    Pair<Long, Long> tag = Pair.of(current.getHopID(), (costsCurrent == null) ? 0 : costsCurrent.ID);
    if (visited.contains(tag))
        return 0;
    visited.add(tag);
    //open template if necessary, including memoization
    //under awareness of current plan choice
    MemoTableEntry best = null;
    boolean opened = false;
    if (memo.contains(current.getHopID())) {
        if (currentType == null) {
            best = memo.get(current.getHopID()).stream().filter(p -> isValid(p, current)).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(new BasicPlanComparator()).orElse(null);
            opened = true;
        } else {
            best = memo.get(current.getHopID()).stream().filter(p -> p.type == currentType || p.type == TemplateType.CellTpl).filter(p -> hasNoRefToMaterialization(p, M, plan)).min(Comparator.comparing(p -> 7 - ((p.type == currentType) ? 4 : 0) - p.countPlanRefs())).orElse(null);
        }
    }
    //create new cost vector if opened, initialized with write costs
    CostVector costVect = !opened ? costsCurrent : new CostVector(Math.max(current.getDim1(), 1) * Math.max(current.getDim2(), 1));
    //add compute costs of current operator to costs vector 
    if (partition.contains(current.getHopID()))
        costVect.computeCosts += computeCosts.get(current.getHopID());
    //process children recursively
    double costs = 0;
    for (int i = 0; i < current.getInput().size(); i++) {
        Hop c = current.getInput().get(i);
        if (best != null && best.isPlanRef(i))
            costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, costVect, best.type);
        else if (best != null && isImplicitlyFused(current, i, best.type))
            costVect.addInputSize(c.getInput().get(0).getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
        else {
            //include children and I/O costs
            costs += rGetPlanCosts(memo, c, visited, partition, M, plan, computeCosts, null, null);
            if (costVect != null && c.getDataType().isMatrix())
                costVect.addInputSize(c.getHopID(), Math.max(c.getDim1(), 1) * Math.max(c.getDim2(), 1));
        }
    }
    //add costs for opened fused operator
    if (partition.contains(current.getHopID())) {
        if (opened) {
            if (LOG.isTraceEnabled())
                LOG.trace("Cost vector for fused operator (hop " + current.getHopID() + "): " + costVect);
            //time for output write
            costs += costVect.outSize * 8 / WRITE_BANDWIDTH;
            costs += Math.max(costVect.computeCosts * costVect.getMaxInputSize() / COMPUTE_BANDWIDTH, costVect.getSumInputSizes() * 8 / READ_BANDWIDTH);
        } else //add costs for non-partition read in the middle of fused operator
        if (hasNonPartitionConsumer(current, partition)) {
            costs += rGetPlanCosts(memo, current, visited, partition, M, plan, computeCosts, null, null);
        }
    }
    //sanity check non-negative costs
    if (costs < 0 || Double.isNaN(costs) || Double.isInfinite(costs))
        throw new RuntimeException("Wrong cost estimate: " + costs);
    return costs;
}
Also used : Arrays(java.util.Arrays) IndexingOp(org.apache.sysml.hops.IndexingOp) HashMap(java.util.HashMap) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) ParameterizedBuiltinOp(org.apache.sysml.hops.ParameterizedBuiltinOp) AggOp(org.apache.sysml.hops.Hop.AggOp) ArrayList(java.util.ArrayList) LiteralOp(org.apache.sysml.hops.LiteralOp) HashSet(java.util.HashSet) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Pair(org.apache.commons.lang3.tuple.Pair) ReorgOp(org.apache.sysml.hops.ReorgOp) IDSequence(org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence) CollectionUtils(org.apache.commons.collections.CollectionUtils) InfrastructureAnalyzer(org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer) AggBinaryOp(org.apache.sysml.hops.AggBinaryOp) TernaryOp(org.apache.sysml.hops.TernaryOp) Iterator(java.util.Iterator) Collection(java.util.Collection) TemplateType(org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType) BinaryOp(org.apache.sysml.hops.BinaryOp) Collectors(java.util.stream.Collectors) Direction(org.apache.sysml.hops.Hop.Direction) Hop(org.apache.sysml.hops.Hop) List(java.util.List) Entry(java.util.Map.Entry) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) Comparator(java.util.Comparator) HopRewriteUtils(org.apache.sysml.hops.rewrite.HopRewriteUtils) UnaryOp(org.apache.sysml.hops.UnaryOp) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Hop(org.apache.sysml.hops.Hop)

Example 13 with Hop

use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.

the class PlanSelectionFuseCostBased method rSelectPlansFuseAll.

private void rSelectPlansFuseAll(CPlanMemoTable memo, Hop current, TemplateType currentType, HashSet<Long> partition) {
    if (isVisited(current.getHopID(), currentType) || !partition.contains(current.getHopID()))
        return;
    //step 1: prune subsumed plans of same type
    if (memo.contains(current.getHopID())) {
        HashSet<MemoTableEntry> rmSet = new HashSet<MemoTableEntry>();
        List<MemoTableEntry> hopP = memo.get(current.getHopID());
        for (MemoTableEntry e1 : hopP) for (MemoTableEntry e2 : hopP) if (e1 != e2 && e1.subsumes(e2))
            rmSet.add(e2);
        memo.remove(current, rmSet);
    }
    //step 2: select plan for current path
    MemoTableEntry best = null;
    if (memo.contains(current.getHopID())) {
        if (currentType == null) {
            best = memo.get(current.getHopID()).stream().filter(p -> isValid(p, current)).min(new BasicPlanComparator()).orElse(null);
        } else {
            best = memo.get(current.getHopID()).stream().filter(p -> p.type == currentType || p.type == TemplateType.CellTpl).min(Comparator.comparing(p -> 7 - ((p.type == currentType) ? 4 : 0) - p.countPlanRefs())).orElse(null);
        }
        addBestPlan(current.getHopID(), best);
    }
    //step 3: recursively process children
    for (int i = 0; i < current.getInput().size(); i++) {
        TemplateType pref = (best != null && best.isPlanRef(i)) ? best.type : null;
        rSelectPlansFuseAll(memo, current.getInput().get(i), pref, partition);
    }
    setVisited(current.getHopID(), currentType);
}
Also used : Arrays(java.util.Arrays) IndexingOp(org.apache.sysml.hops.IndexingOp) HashMap(java.util.HashMap) AggUnaryOp(org.apache.sysml.hops.AggUnaryOp) ParameterizedBuiltinOp(org.apache.sysml.hops.ParameterizedBuiltinOp) AggOp(org.apache.sysml.hops.Hop.AggOp) ArrayList(java.util.ArrayList) LiteralOp(org.apache.sysml.hops.LiteralOp) HashSet(java.util.HashSet) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) Pair(org.apache.commons.lang3.tuple.Pair) ReorgOp(org.apache.sysml.hops.ReorgOp) IDSequence(org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence) CollectionUtils(org.apache.commons.collections.CollectionUtils) InfrastructureAnalyzer(org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer) AggBinaryOp(org.apache.sysml.hops.AggBinaryOp) TernaryOp(org.apache.sysml.hops.TernaryOp) Iterator(java.util.Iterator) Collection(java.util.Collection) TemplateType(org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType) BinaryOp(org.apache.sysml.hops.BinaryOp) Collectors(java.util.stream.Collectors) Direction(org.apache.sysml.hops.Hop.Direction) Hop(org.apache.sysml.hops.Hop) List(java.util.List) Entry(java.util.Map.Entry) Log(org.apache.commons.logging.Log) LogFactory(org.apache.commons.logging.LogFactory) Comparator(java.util.Comparator) HopRewriteUtils(org.apache.sysml.hops.rewrite.HopRewriteUtils) UnaryOp(org.apache.sysml.hops.UnaryOp) MemoTableEntry(org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry) TemplateType(org.apache.sysml.hops.codegen.template.TemplateBase.TemplateType) HashSet(java.util.HashSet)

Example 14 with Hop

use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.

the class SpoofCompiler method cleanupCPlans.

/**
	 * Cleanup generated cplans in order to remove unnecessary inputs created
	 * during incremental construction. This is important as it avoids unnecessary 
	 * redundant computation. 
	 * 
	 * @param cplans set of cplans
	 */
private static HashMap<Long, Pair<Hop[], CNodeTpl>> cleanupCPlans(HashMap<Long, Pair<Hop[], CNodeTpl>> cplans) {
    HashMap<Long, Pair<Hop[], CNodeTpl>> cplans2 = new HashMap<Long, Pair<Hop[], CNodeTpl>>();
    for (Entry<Long, Pair<Hop[], CNodeTpl>> e : cplans.entrySet()) {
        CNodeTpl tpl = e.getValue().getValue();
        Hop[] inHops = e.getValue().getKey();
        //collect cplan leaf node names
        HashSet<Long> leafs = new HashSet<Long>();
        if (tpl instanceof CNodeMultiAgg)
            for (CNode out : ((CNodeMultiAgg) tpl).getOutputs()) rCollectLeafIDs(out, leafs);
        else
            rCollectLeafIDs(tpl.getOutput(), leafs);
        //create clean cplan w/ minimal inputs
        if (inHops.length == leafs.size())
            cplans2.put(e.getKey(), e.getValue());
        else {
            tpl.cleanupInputs(leafs);
            ArrayList<Hop> tmp = new ArrayList<Hop>();
            for (Hop hop : inHops) {
                if (hop != null && leafs.contains(hop.getHopID()))
                    tmp.add(hop);
            }
            cplans2.put(e.getKey(), new Pair<Hop[], CNodeTpl>(tmp.toArray(new Hop[0]), tpl));
        }
        //remove invalid plans with column indexing on main input
        if (tpl instanceof CNodeCell) {
            CNodeData in1 = (CNodeData) tpl.getInput().get(0);
            if (rHasLookupRC1(tpl.getOutput(), in1) || isLookupRC1(tpl.getOutput(), in1)) {
                cplans2.remove(e.getKey());
                if (LOG.isTraceEnabled())
                    LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
            }
        } else if (tpl instanceof CNodeMultiAgg) {
            CNodeData in1 = (CNodeData) tpl.getInput().get(0);
            for (CNode output : ((CNodeMultiAgg) tpl).getOutputs()) if (rHasLookupRC1(output, in1) || isLookupRC1(output, in1)) {
                cplans2.remove(e.getKey());
                if (LOG.isTraceEnabled())
                    LOG.trace("Removed cplan due to invalid rc1 indexing on main input.");
            }
        }
        //remove spurious lookups on main input of cell template
        if (tpl instanceof CNodeCell || tpl instanceof CNodeOuterProduct) {
            CNodeData in1 = (CNodeData) tpl.getInput().get(0);
            rFindAndRemoveLookup(tpl.getOutput(), in1);
        } else if (tpl instanceof CNodeMultiAgg) {
            CNodeData in1 = (CNodeData) tpl.getInput().get(0);
            rFindAndRemoveLookupMultiAgg((CNodeMultiAgg) tpl, in1);
        }
        //remove cplan w/ single op and w/o agg
        if ((tpl instanceof CNodeCell && ((((CNodeCell) tpl).getCellType() == CellType.NO_AGG && TemplateUtils.hasSingleOperation(tpl)) || TemplateUtils.hasNoOperation(tpl))) || tpl instanceof CNodeRow && TemplateUtils.hasSingleOperation(tpl))
            cplans2.remove(e.getKey());
        //remove cplan if empty
        if (tpl.getOutput() instanceof CNodeData)
            cplans2.remove(e.getKey());
    }
    return cplans2;
}
Also used : CNodeData(org.apache.sysml.hops.codegen.cplan.CNodeData) CNodeTpl(org.apache.sysml.hops.codegen.cplan.CNodeTpl) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Hop(org.apache.sysml.hops.Hop) ArrayList(java.util.ArrayList) CNodeCell(org.apache.sysml.hops.codegen.cplan.CNodeCell) CNode(org.apache.sysml.hops.codegen.cplan.CNode) CNodeOuterProduct(org.apache.sysml.hops.codegen.cplan.CNodeOuterProduct) CNodeRow(org.apache.sysml.hops.codegen.cplan.CNodeRow) CNodeMultiAgg(org.apache.sysml.hops.codegen.cplan.CNodeMultiAgg) Pair(org.apache.sysml.runtime.matrix.data.Pair) HashSet(java.util.HashSet)

Example 15 with Hop

use of org.apache.sysml.hops.Hop in project incubator-systemml by apache.

the class SpoofCompiler method constructCPlans.

////////////////////
// Codegen plan construction
private static HashMap<Long, Pair<Hop[], CNodeTpl>> constructCPlans(ArrayList<Hop> roots, boolean compileLiterals) throws DMLException {
    //explore cplan candidates
    CPlanMemoTable memo = new CPlanMemoTable();
    for (Hop hop : roots) rExploreCPlans(hop, memo, compileLiterals);
    //select optimal cplan candidates
    memo.pruneSuboptimal(roots);
    //construct actual cplan representations
    //note: we do not use the hop visit status due to jumps over fused operators which would
    //corrupt subsequent resets, leaving partial hops dags in visited status
    LinkedHashMap<Long, Pair<Hop[], CNodeTpl>> ret = new LinkedHashMap<Long, Pair<Hop[], CNodeTpl>>();
    HashSet<Long> visited = new HashSet<Long>();
    for (Hop hop : roots) rConstructCPlans(hop, memo, ret, compileLiterals, visited);
    return ret;
}
Also used : CNodeTpl(org.apache.sysml.hops.codegen.cplan.CNodeTpl) CPlanMemoTable(org.apache.sysml.hops.codegen.template.CPlanMemoTable) Hop(org.apache.sysml.hops.Hop) LinkedHashMap(java.util.LinkedHashMap) Pair(org.apache.sysml.runtime.matrix.data.Pair) HashSet(java.util.HashSet)

Aggregations

Hop (org.apache.sysml.hops.Hop)307 LiteralOp (org.apache.sysml.hops.LiteralOp)94 AggBinaryOp (org.apache.sysml.hops.AggBinaryOp)65 BinaryOp (org.apache.sysml.hops.BinaryOp)63 ArrayList (java.util.ArrayList)61 AggUnaryOp (org.apache.sysml.hops.AggUnaryOp)61 HashMap (java.util.HashMap)44 DataOp (org.apache.sysml.hops.DataOp)41 UnaryOp (org.apache.sysml.hops.UnaryOp)41 HashSet (java.util.HashSet)39 ReorgOp (org.apache.sysml.hops.ReorgOp)32 MemoTableEntry (org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry)28 StatementBlock (org.apache.sysml.parser.StatementBlock)28 IndexingOp (org.apache.sysml.hops.IndexingOp)24 ForStatementBlock (org.apache.sysml.parser.ForStatementBlock)23 WhileStatementBlock (org.apache.sysml.parser.WhileStatementBlock)23 IfStatementBlock (org.apache.sysml.parser.IfStatementBlock)22 DataGenOp (org.apache.sysml.hops.DataGenOp)21 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)21 HopsException (org.apache.sysml.hops.HopsException)18