Search in sources :

Example 1 with GDFNode

use of org.apache.sysml.hops.globalopt.gdfgraph.GDFNode in project incubator-systemml by apache.

the class GDFEnumOptimizer method optimize.

@Override
public GDFGraph optimize(GDFGraph gdfgraph, Summary summary) throws DMLRuntimeException, HopsException, LopsException {
    Timing time = new Timing(true);
    Program prog = gdfgraph.getRuntimeProgram();
    ExecutionContext ec = ExecutionContextFactory.createContext(prog);
    ArrayList<GDFNode> roots = gdfgraph.getGraphRootNodes();
    //Step 1: baseline costing for branch and bound costs
    double initCosts = Double.MAX_VALUE;
    if (BRANCH_AND_BOUND_PRUNING) {
        initCosts = CostEstimationWrapper.getTimeEstimate(prog, ec);
        initCosts = initCosts * (1 + BRANCH_AND_BOUND_REL_THRES);
    }
    //Step 2: dynamic programming plan generation
    //(finally, pick optimal root plans over all interesting property sets)
    ArrayList<Plan> rootPlans = new ArrayList<Plan>();
    for (GDFNode node : roots) {
        PlanSet ps = enumOpt(node, _memo, initCosts);
        Plan optPlan = ps.getPlanWithMinCosts();
        rootPlans.add(optPlan);
    }
    long enumPlanMismatch = getPlanMismatches();
    //check for final containment of independent roots and pick optimal
    HashMap<Long, Plan> memo = new HashMap<Long, Plan>();
    resetPlanMismatches();
    for (Plan p : rootPlans) rSetRuntimePlanConfig(p, memo);
    long finalPlanMismatch = getPlanMismatches();
    //generate final runtime plan (w/ optimal config)
    Recompiler.recompileProgramBlockHierarchy(prog.getProgramBlocks(), new LocalVariableMap(), 0, false);
    ec = ExecutionContextFactory.createContext(prog);
    double optCosts = CostEstimationWrapper.getTimeEstimate(prog, ec);
    //maintain optimization summary statistics
    summary.setCostsInitial(initCosts);
    summary.setCostsOptimal(optCosts);
    summary.setNumEnumPlans(_enumeratedPlans);
    summary.setNumPrunedInvalidPlans(_prunedInvalidPlans);
    summary.setNumPrunedSuboptPlans(_prunedSuboptimalPlans);
    summary.setNumCompiledPlans(_compiledPlans);
    summary.setNumCostedPlans(_costedPlans);
    summary.setNumEnumPlanMismatch(enumPlanMismatch);
    summary.setNumFinalPlanMismatch(finalPlanMismatch);
    summary.setTimeOptim(time.stop());
    return gdfgraph;
}
Also used : Program(org.apache.sysml.runtime.controlprogram.Program) HashMap(java.util.HashMap) GDFNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFNode) ArrayList(java.util.ArrayList) ExecutionContext(org.apache.sysml.runtime.controlprogram.context.ExecutionContext) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)

Example 2 with GDFNode

use of org.apache.sysml.hops.globalopt.gdfgraph.GDFNode in project incubator-systemml by apache.

the class GDFEnumOptimizer method enumNodePlans.

private static PlanSet enumNodePlans(GDFNode node, MemoStructure memo, double maxCosts) throws DMLRuntimeException {
    ArrayList<Plan> plans = new ArrayList<Plan>();
    ExecType CLUSTER = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR;
    // CASE 1: core hop enumeration (other than persistent/transient read/write) 
    if (node.getNodeType() == NodeType.HOP_NODE && !(node.getHop() instanceof DataOp)) {
        //core rewrite enumeration for cp and mr
        enumHopNodePlans(node, plans);
    } else //CASE 2: dataop hop enumeration 
    if (node.getHop() instanceof DataOp) {
        DataOp dhop = (DataOp) node.getHop();
        if (dhop.getDataOpType() == DataOpTypes.PERSISTENTREAD) {
            //for persistent read the interesting properties are fixed by the input
            //but we can decide on output properties
            ExecType et = (dhop.getMemEstimate() > OptimizerUtils.getLocalMemBudget() || HopRewriteUtils.alwaysRequiresReblock(dhop)) ? CLUSTER : ExecType.CP;
            int[] blocksizes = (et == CLUSTER) ? BLOCK_SIZES : new int[] { BLOCK_SIZES[0] };
            for (Integer bs : blocksizes) {
                RewriteConfig rcmr = new RewriteConfig(et, bs, FileFormatTypes.BINARY);
                InterestingProperties ipsmr = rcmr.deriveInterestingProperties();
                Plan mrplan = new Plan(node, ipsmr, rcmr, null);
                plans.add(mrplan);
            }
        } else if (dhop.getDataOpType() == DataOpTypes.PERSISTENTWRITE) {
            //for persistent write the interesting properties are fixed by the given
            //write specification
            ExecType et = (dhop.getMemEstimate() > OptimizerUtils.getLocalMemBudget()) ? CLUSTER : ExecType.CP;
            RewriteConfig rcmr = new RewriteConfig(et, (int) dhop.getRowsInBlock(), dhop.getInputFormatType());
            InterestingProperties ipsmr = rcmr.deriveInterestingProperties();
            Plan mrplan = new Plan(node, ipsmr, rcmr, null);
            plans.add(mrplan);
        } else if (dhop.getDataOpType() == DataOpTypes.TRANSIENTREAD || dhop.getDataOpType() == DataOpTypes.TRANSIENTWRITE) {
            //note: full enumeration for transient read and write; otherwise the properties
            //of these hops are never set because pass-through plans refer to different hops
            enumHopNodePlans(node, plans);
        }
    } else //ENUMERATE LOOP PLANS
    if (node.getNodeType() == NodeType.LOOP_NODE) {
        //TODO consistency checks inputs and outputs (updated vars)
        GDFLoopNode lnode = (GDFLoopNode) node;
        //no additional pruning (validity, optimality) required
        for (GDFNode in : lnode.getLoopInputs().values()) enumOpt(in, memo, maxCosts);
        //step 1: enumerate loop plan, incl partitioning/checkpoints/reblock for inputs
        RewriteConfig rc = new RewriteConfig(ExecType.CP, -1, null);
        InterestingProperties ips = rc.deriveInterestingProperties();
        Plan lplan = new Plan(node, ips, rc, null);
        plans.add(lplan);
        //(predicate might be null if single variable)
        if (lnode.getLoopPredicate() != null)
            enumOpt(lnode.getLoopPredicate(), memo, maxCosts);
        //step 3: recursive call optimize on outputs
        //(return union of all output plans, later selected by output var)
        PlanSet Pout = new PlanSet();
        for (GDFNode out : lnode.getLoopOutputs().values()) Pout = Pout.union(enumOpt(out, memo, maxCosts));
        plans.addAll(Pout.getPlans());
    //note: global pruning later done when returning to enumOpt
    //for the entire loop node			
    } else //CREATE DUMMY CROSSBLOCK PLAN
    if (node.getNodeType() == NodeType.CROSS_BLOCK_NODE) {
    //do nothing (leads to pass-through on crossProductChild)
    }
    return new PlanSet(plans);
}
Also used : GDFLoopNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFLoopNode) ArrayList(java.util.ArrayList) GDFNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFNode) ExecType(org.apache.sysml.lops.LopProperties.ExecType) DataOp(org.apache.sysml.hops.DataOp)

Example 3 with GDFNode

use of org.apache.sysml.hops.globalopt.gdfgraph.GDFNode in project incubator-systemml by apache.

the class GDFEnumOptimizer method enumOpt.

/**
	 * Core dynamic programming enumeration algorithm
	 * for global data flow optimization.
	 * 
	 * @param node the GDF node
	 * @param memo the memo structure
	 * @param maxCosts max costs
	 * @return the plan set
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
public static PlanSet enumOpt(GDFNode node, MemoStructure memo, double maxCosts) throws DMLRuntimeException {
    //memoization of already enumerated subgraphs
    if (memo.constainsEntry(node))
        return memo.getEntry(node);
    //enumerate node plans
    PlanSet P = enumNodePlans(node, memo, maxCosts);
    //combine local node plan with optimal child plans
    for (GDFNode c : node.getInputs()) {
        //recursive optimization
        PlanSet Pc = enumOpt(c, memo, maxCosts);
        if (c instanceof GDFLoopNode)
            Pc = Pc.selectChild(node);
        //combine parent-child plans
        P = P.crossProductChild(Pc);
        _enumeratedPlans += P.size();
        //prune invalid plans
        pruneInvalidPlans(P);
    }
    //prune suboptimal plans
    pruneSuboptimalPlans(P, maxCosts);
    //memoization of created entries
    memo.putEntry(node, P);
    return P;
}
Also used : GDFLoopNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFLoopNode) GDFNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFNode)

Example 4 with GDFNode

use of org.apache.sysml.hops.globalopt.gdfgraph.GDFNode in project incubator-systemml by apache.

the class Explain method explainGDFNode.

//////////////
// internal explain GDFNODE
/**
	 * Do a post-order traverse through the GDFNode DAG and explain each GDFNode.
	 * Note: nodes referring to literalops are suppressed.
	 * 
	 * @param gnode GDF node
	 * @param level offset
	 * @param memo memoization table
	 * @return string explanation
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private static String explainGDFNode(GDFNode gnode, int level, HashSet<Long> memo) throws DMLRuntimeException {
    //basic memoization via memo table since gnode has no visit status
    if (memo.contains(gnode.getID()) || gnode.getNodeType() == NodeType.HOP_NODE && gnode.getHop() instanceof LiteralOp) {
        return "";
    }
    StringBuilder sb = new StringBuilder();
    String offset = createOffset(level);
    for (GDFNode input : gnode.getInputs()) sb.append(explainGDFNode(input, level, memo));
    //indentation
    sb.append(offset);
    //hop id
    String deps = null;
    if (SHOW_DATA_DEPENDENCIES) {
        sb.append("(" + gnode.getID() + ") ");
        StringBuilder childs = new StringBuilder();
        childs.append(" (");
        boolean childAdded = false;
        for (GDFNode input : gnode.getInputs()) {
            childs.append(childAdded ? "," : "");
            childs.append(input.getID());
            childAdded = true;
        }
        childs.append(")");
        if (childAdded)
            deps = childs.toString();
    }
    //operation string
    if (//LOOP NODES
    gnode instanceof GDFLoopNode) {
        GDFLoopNode lgnode = (GDFLoopNode) gnode;
        String offset2 = createOffset(level + 1);
        //loop header
        sb.append(lgnode.explain(deps) + "\n");
        sb.append(offset2 + "PRED:\n");
        sb.append(explainGDFNode(lgnode.getLoopPredicate(), level + 2, memo));
        sb.append(offset2 + "BODY:\n");
        //note: memo table and already done child explain prevents redundancy
        for (Entry<String, GDFNode> root : lgnode.getLoopOutputs().entrySet()) {
            sb.append(explainGDFNode(root.getValue(), level + 2, memo));
        }
    } else //GENERAL CASE (BASIC/CROSSBLOCK NODES)
    {
        sb.append(gnode.explain(deps));
        sb.append('\n');
    }
    /*
		//matrix characteristics
		sb.append(" [" + hop.getDim1() + "," 
		               + hop.getDim2() + "," 
				       + hop.getRowsInBlock() + "," 
		               + hop.getColsInBlock() + "," 
				       + hop.getNnz() + "]");
		
		//memory estimates
		sb.append(" [" + showMem(hop.getInputMemEstimate(), false) + "," 
		               + showMem(hop.getIntermediateMemEstimate(), false) + "," 
				       + showMem(hop.getOutputMemEstimate(), false) + " -> " 
		               + showMem(hop.getMemEstimate(), true) + "]");
		
		//exec type
		if (hop.getExecType() != null)
			sb.append(", " + hop.getExecType());
		*/
    //memoization
    memo.add(gnode.getID());
    return sb.toString();
}
Also used : GDFLoopNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFLoopNode) GDFNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFNode) LiteralOp(org.apache.sysml.hops.LiteralOp)

Example 5 with GDFNode

use of org.apache.sysml.hops.globalopt.gdfgraph.GDFNode in project incubator-systemml by apache.

the class Explain method explainGDFNodes.

public static String explainGDFNodes(ArrayList<GDFNode> gdfnodes, int level) throws DMLRuntimeException {
    StringBuilder sb = new StringBuilder();
    HashSet<Long> memo = new HashSet<Long>();
    for (GDFNode gnode : gdfnodes) sb.append(explainGDFNode(gnode, level, memo));
    return sb.toString();
}
Also used : GDFNode(org.apache.sysml.hops.globalopt.gdfgraph.GDFNode) HashSet(java.util.HashSet)

Aggregations

GDFNode (org.apache.sysml.hops.globalopt.gdfgraph.GDFNode)5 GDFLoopNode (org.apache.sysml.hops.globalopt.gdfgraph.GDFLoopNode)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 DataOp (org.apache.sysml.hops.DataOp)1 LiteralOp (org.apache.sysml.hops.LiteralOp)1 ExecType (org.apache.sysml.lops.LopProperties.ExecType)1 LocalVariableMap (org.apache.sysml.runtime.controlprogram.LocalVariableMap)1 Program (org.apache.sysml.runtime.controlprogram.Program)1 ExecutionContext (org.apache.sysml.runtime.controlprogram.context.ExecutionContext)1 Timing (org.apache.sysml.runtime.controlprogram.parfor.stat.Timing)1