Search in sources :

Example 1 with Edge

use of org.apache.tez.dag.app.dag.impl.Edge in project hive by apache.

the class LlapTaskSchedulerService method getTransitiveVertexOutputs.

private static Map<Integer, Set<Integer>> getTransitiveVertexOutputs(DagInfo info) {
    if (!(info instanceof DAG)) {
        LOG.warn("DAG info is not a DAG - cannot derive dependencies");
        return null;
    }
    DAG dag = (DAG) info;
    int vc = dag.getVertices().size();
    // All the vertices belong to the same DAG, so we just use numbers.
    Map<Integer, Set<Integer>> result = Maps.newHashMapWithExpectedSize(vc);
    LinkedList<TezVertexID> queue = new LinkedList<>();
    // We assume a DAG is a DAG, and that it's connected. Add direct dependencies.
    for (Vertex v : dag.getVertices().values()) {
        Map<Vertex, Edge> out = v.getOutputVertices();
        if (out == null) {
            result.put(v.getVertexId().getId(), Sets.newHashSet());
        } else {
            Set<Integer> set = Sets.newHashSetWithExpectedSize(vc);
            for (Vertex outV : out.keySet()) {
                set.add(outV.getVertexId().getId());
            }
            result.put(v.getVertexId().getId(), set);
        }
        if (v.getOutputVerticesCount() == 0) {
            queue.add(v.getVertexId());
        }
    }
    Set<Integer> processed = Sets.newHashSetWithExpectedSize(vc);
    while (!queue.isEmpty()) {
        TezVertexID id = queue.poll();
        // Already processed. See backtracking.
        if (processed.contains(id.getId()))
            continue;
        Vertex v = dag.getVertex(id);
        Map<Vertex, Edge> out = v.getOutputVertices();
        if (out != null) {
            // Check that all the outputs have been processed; if not, insert them into queue
            // before the current vertex and try again. It's possible e.g. in a structure like this:
            // _1
            // / 2
            // 3  4 where 1 may be added to the queue before 2
            boolean doBacktrack = false;
            for (Vertex outV : out.keySet()) {
                TezVertexID outId = outV.getVertexId();
                int outNum = outId.getId();
                if (!processed.contains(outNum)) {
                    if (!doBacktrack) {
                        queue.addFirst(id);
                        doBacktrack = true;
                    }
                    queue.addFirst(outId);
                }
            }
            if (doBacktrack)
                continue;
        }
        int num = id.getId();
        processed.add(num);
        Set<Integer> deps = result.get(num);
        Map<Vertex, Edge> in = v.getInputVertices();
        if (in != null) {
            for (Vertex inV : in.keySet()) {
                queue.add(inV.getVertexId());
                // Our outputs are the transitive outputs of our inputs.
                result.get(inV.getVertexId().getId()).addAll(deps);
            }
        }
    }
    return result;
}
Also used : Vertex(org.apache.tez.dag.app.dag.Vertex) EnumSet(java.util.EnumSet) LlapServiceInstanceSet(org.apache.hadoop.hive.llap.registry.LlapServiceInstanceSet) Set(java.util.Set) TreeSet(java.util.TreeSet) DAG(org.apache.tez.dag.app.dag.DAG) LinkedList(java.util.LinkedList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Edge(org.apache.tez.dag.app.dag.impl.Edge) TezVertexID(org.apache.tez.dag.records.TezVertexID)

Aggregations

EnumSet (java.util.EnumSet)1 LinkedList (java.util.LinkedList)1 Set (java.util.Set)1 TreeSet (java.util.TreeSet)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 LlapServiceInstanceSet (org.apache.hadoop.hive.llap.registry.LlapServiceInstanceSet)1 DAG (org.apache.tez.dag.app.dag.DAG)1 Vertex (org.apache.tez.dag.app.dag.Vertex)1 Edge (org.apache.tez.dag.app.dag.impl.Edge)1 TezVertexID (org.apache.tez.dag.records.TezVertexID)1