Search in sources :

Example 46 with DAG

use of org.apache.tez.dag.app.dag.DAG in project hive by apache.

the class LlapTaskSchedulerService method getTransitiveVertexOutputs.

private static Map<Integer, Set<Integer>> getTransitiveVertexOutputs(DagInfo info) {
    if (!(info instanceof DAG)) {
        LOG.warn("DAG info is not a DAG - cannot derive dependencies");
        return null;
    }
    DAG dag = (DAG) info;
    int vc = dag.getVertices().size();
    // All the vertices belong to the same DAG, so we just use numbers.
    Map<Integer, Set<Integer>> result = Maps.newHashMapWithExpectedSize(vc);
    LinkedList<TezVertexID> queue = new LinkedList<>();
    // We assume a DAG is a DAG, and that it's connected. Add direct dependencies.
    for (Vertex v : dag.getVertices().values()) {
        Map<Vertex, Edge> out = v.getOutputVertices();
        if (out == null) {
            result.put(v.getVertexId().getId(), Sets.newHashSet());
        } else {
            Set<Integer> set = Sets.newHashSetWithExpectedSize(vc);
            for (Vertex outV : out.keySet()) {
                set.add(outV.getVertexId().getId());
            }
            result.put(v.getVertexId().getId(), set);
        }
        if (v.getOutputVerticesCount() == 0) {
            queue.add(v.getVertexId());
        }
    }
    Set<Integer> processed = Sets.newHashSetWithExpectedSize(vc);
    while (!queue.isEmpty()) {
        TezVertexID id = queue.poll();
        // Already processed. See backtracking.
        if (processed.contains(id.getId()))
            continue;
        Vertex v = dag.getVertex(id);
        Map<Vertex, Edge> out = v.getOutputVertices();
        if (out != null) {
            // Check that all the outputs have been processed; if not, insert them into queue
            // before the current vertex and try again. It's possible e.g. in a structure like this:
            // _1
            // / 2
            // 3  4 where 1 may be added to the queue before 2
            boolean doBacktrack = false;
            for (Vertex outV : out.keySet()) {
                TezVertexID outId = outV.getVertexId();
                int outNum = outId.getId();
                if (!processed.contains(outNum)) {
                    if (!doBacktrack) {
                        queue.addFirst(id);
                        doBacktrack = true;
                    }
                    queue.addFirst(outId);
                }
            }
            if (doBacktrack)
                continue;
        }
        int num = id.getId();
        processed.add(num);
        Set<Integer> deps = result.get(num);
        Map<Vertex, Edge> in = v.getInputVertices();
        if (in != null) {
            for (Vertex inV : in.keySet()) {
                queue.add(inV.getVertexId());
                // Our outputs are the transitive outputs of our inputs.
                result.get(inV.getVertexId().getId()).addAll(deps);
            }
        }
    }
    return result;
}
Also used : Vertex(org.apache.tez.dag.app.dag.Vertex) EnumSet(java.util.EnumSet) LlapServiceInstanceSet(org.apache.hadoop.hive.llap.registry.LlapServiceInstanceSet) Set(java.util.Set) TreeSet(java.util.TreeSet) DAG(org.apache.tez.dag.app.dag.DAG) LinkedList(java.util.LinkedList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Edge(org.apache.tez.dag.app.dag.impl.Edge) TezVertexID(org.apache.tez.dag.records.TezVertexID)

Aggregations

DAG (org.apache.tez.dag.app.dag.DAG)46 Vertex (org.apache.tez.dag.app.dag.Vertex)22 Test (org.junit.Test)16 HashMap (java.util.HashMap)11 TezVertexID (org.apache.tez.dag.records.TezVertexID)11 EventHandler (org.apache.hadoop.yarn.event.EventHandler)10 Set (java.util.Set)9 TreeMap (java.util.TreeMap)8 Event (org.apache.hadoop.yarn.event.Event)8 AppContext (org.apache.tez.dag.app.AppContext)8 TezDAGID (org.apache.tez.dag.records.TezDAGID)7 Configuration (org.apache.hadoop.conf.Configuration)6 Matchers.anyString (org.mockito.Matchers.anyString)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Map (java.util.Map)5 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)5 TaskAttempt (org.apache.tez.dag.app.dag.TaskAttempt)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4