Search in sources :

Example 1 with VertexGroup

use of org.apache.tez.dag.api.VertexGroup in project hive by apache.

the class TezTask method build.

DAG build(JobConf conf, TezWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, Context ctx) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    // getAllWork returns a topologically sorted list, which we use to make
    // sure that vertices are created before they are used in edges.
    List<BaseWork> ws = work.getAllWork();
    Collections.reverse(ws);
    FileSystem fs = scratchDir.getFileSystem(conf);
    // the name of the dag is what is displayed in the AM/Job UI
    String dagName = utils.createDagName(conf, queryPlan);
    LOG.info("Dag name: " + dagName);
    DAG dag = DAG.create(dagName);
    // set some info for the query
    JSONObject json = new JSONObject(new LinkedHashMap()).put("context", "Hive").put("description", ctx.getCmd());
    String dagInfo = json.toString();
    if (LOG.isDebugEnabled()) {
        LOG.debug("DagInfo: " + dagInfo);
    }
    dag.setDAGInfo(dagInfo);
    dag.setCredentials(conf.getCredentials());
    setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
    for (BaseWork w : ws) {
        boolean isFinal = work.getLeaves().contains(w);
        // translate work to vertex
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
        if (w instanceof UnionWork) {
            // Special case for unions. These items translate to VertexGroups
            List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
            List<BaseWork> children = new LinkedList<BaseWork>();
            // proper children of the union
            for (BaseWork v : work.getChildren(w)) {
                EdgeType type = work.getEdgeProperty(w, v).getEdgeType();
                if (type == EdgeType.CONTAINS) {
                    unionWorkItems.add(v);
                } else {
                    children.add(v);
                }
            }
            // create VertexGroup
            Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
            int i = 0;
            for (BaseWork v : unionWorkItems) {
                vertexArray[i++] = workToVertex.get(v);
            }
            VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
            // For a vertex group, all Outputs use the same Key-class, Val-class and partitioner.
            // Pick any one source vertex to figure out the Edge configuration.
            JobConf parentConf = workToConf.get(unionWorkItems.get(0));
            // now hook up the children
            for (BaseWork v : children) {
                // finally we can create the grouped edge
                GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v));
                dag.addEdge(e);
            }
        } else {
            // Regular vertices
            JobConf wxConf = utils.initializeVertexConf(conf, ctx, w);
            Vertex wx = utils.createVertex(wxConf, w, scratchDir, appJarLr, additionalLr, fs, ctx, !isFinal, work, work.getVertexType(w));
            if (w.getReservedMemoryMB() > 0) {
                // If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
                double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf);
                LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
                wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
            }
            // Otherwise just leave it up to Tez to decide how much memory to allocate
            dag.addVertex(wx);
            utils.addCredentials(w, dag);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
            workToVertex.put(w, wx);
            workToConf.put(w, wxConf);
            // add all dependencies (i.e.: edges) to the graph
            for (BaseWork v : work.getChildren(w)) {
                assert workToVertex.containsKey(v);
                Edge e = null;
                TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
                e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v));
                dag.addEdge(e);
            }
        }
    }
    // Clear the work map after build. TODO: remove caching instead?
    Utilities.clearWorkMap(conf);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    return dag;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) UnionWork(org.apache.hadoop.hive.ql.plan.UnionWork) DAG(org.apache.tez.dag.api.DAG) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) VertexGroup(org.apache.tez.dag.api.VertexGroup) JSONObject(org.json.JSONObject) FileSystem(org.apache.hadoop.fs.FileSystem) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) JobConf(org.apache.hadoop.mapred.JobConf) Edge(org.apache.tez.dag.api.Edge) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Aggregations

LinkedHashMap (java.util.LinkedHashMap)1 LinkedList (java.util.LinkedList)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)1 TezEdgeProperty (org.apache.hadoop.hive.ql.plan.TezEdgeProperty)1 EdgeType (org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType)1 UnionWork (org.apache.hadoop.hive.ql.plan.UnionWork)1 JobConf (org.apache.hadoop.mapred.JobConf)1 DAG (org.apache.tez.dag.api.DAG)1 Edge (org.apache.tez.dag.api.Edge)1 GroupInputEdge (org.apache.tez.dag.api.GroupInputEdge)1 Vertex (org.apache.tez.dag.api.Vertex)1 VertexGroup (org.apache.tez.dag.api.VertexGroup)1 JSONObject (org.json.JSONObject)1