Search in sources :

Example 6 with DAG

use of org.apache.tez.dag.api.DAG in project hive by apache.

the class TezJobMonitor method monitorExecution.

public int monitorExecution() {
    boolean done = false;
    boolean success = false;
    int failedCounter = 0;
    final StopWatch failureTimer = new StopWatch();
    int rc = 0;
    DAGStatus status = null;
    Map<String, Progress> vertexProgressMap = null;
    long monitorStartTime = System.currentTimeMillis();
    synchronized (shutdownList) {
        shutdownList.add(dagClient);
    }
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
    DAGStatus.State lastState = null;
    boolean running = false;
    long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    WmContext wmContext = null;
    while (true) {
        try {
            if (context != null) {
                context.checkHeartbeaterLockException();
            }
            status = dagClient.getDAGStatus(EnumSet.of(StatusGetOpts.GET_COUNTERS), checkInterval);
            TezCounters dagCounters = status.getDAGCounters();
            vertexProgressMap = status.getVertexProgress();
            wmContext = context.getWmContext();
            List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
            if (dagCounters != null && wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                if (desiredCounters != null && !desiredCounters.isEmpty()) {
                    Map<String, Long> currentCounters = getCounterValues(dagCounters, vertexNames, vertexProgressMap, desiredCounters, done);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
                    }
                    wmContext.setCurrentCounters(currentCounters);
                }
            }
            DAGStatus.State state = status.getState();
            // AM is responsive again (recovery?)
            failedCounter = 0;
            failureTimer.reset();
            if (state != lastState || state == RUNNING) {
                lastState = state;
                switch(state) {
                    case SUBMITTED:
                        console.printInfo("Status: Submitted");
                        break;
                    case INITING:
                        console.printInfo("Status: Initializing");
                        this.executionStartTime = System.currentTimeMillis();
                        break;
                    case RUNNING:
                        if (!running) {
                            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
                            console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
                            this.executionStartTime = System.currentTimeMillis();
                            running = true;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        break;
                    case SUCCEEDED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        success = true;
                        running = false;
                        done = true;
                        break;
                    case KILLED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printInfo("Status: Killed");
                        running = false;
                        done = true;
                        rc = 1;
                        break;
                    case FAILED:
                    case ERROR:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printError("Status: Failed");
                        running = false;
                        done = true;
                        rc = 2;
                        break;
                }
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } catch (Exception e) {
            console.printInfo("Exception: " + e.getMessage());
            boolean isInterrupted = hasInterruptedException(e);
            if (failedCounter == 0) {
                failureTimer.reset();
                failureTimer.start();
            }
            if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
                try {
                    if (isInterrupted) {
                        console.printInfo("Killing DAG...");
                    } else {
                        console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
                    }
                    dagClient.tryKillDAG();
                } catch (IOException | TezException tezException) {
                // best effort
                }
                console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
                rc = 1;
                done = true;
            } else {
                console.printInfo("Retrying...");
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } finally {
            if (done) {
                if (wmContext != null && done) {
                    wmContext.setQueryCompleted(true);
                }
                if (rc != 0 && status != null) {
                    for (String diag : status.getDiagnostics()) {
                        console.printError(diag);
                        diagnostics.append(diag);
                    }
                }
                synchronized (shutdownList) {
                    shutdownList.remove(dagClient);
                }
                break;
            }
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    printSummary(success, vertexProgressMap);
    return rc;
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounter(org.apache.tez.common.counters.TezCounter) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) InterruptedIOException(java.io.InterruptedIOException) TimeCounterLimit(org.apache.hadoop.hive.ql.wm.TimeCounterLimit) VertexCounterLimit(org.apache.hadoop.hive.ql.wm.VertexCounterLimit) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) ProgressMonitor(org.apache.hadoop.hive.common.log.ProgressMonitor) Map(java.util.Map) InPlaceUpdate(org.apache.hadoop.hive.common.log.InPlaceUpdate) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CounterGroup(org.apache.tez.common.counters.CounterGroup) LinkedList(java.util.LinkedList) EnumSet(java.util.EnumSet) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Progress(org.apache.tez.dag.api.client.Progress) Logger(org.slf4j.Logger) StringWriter(java.io.StringWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) StopWatch(org.apache.tez.util.StopWatch) IOException(java.io.IOException) DAG(org.apache.tez.dag.api.DAG) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) RUNNING(org.apache.tez.dag.api.client.DAGStatus.State.RUNNING) ShutdownHookManager(org.apache.hive.common.util.ShutdownHookManager) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezException(org.apache.tez.dag.api.TezException) TimeUnit(java.util.concurrent.TimeUnit) TezCounters(org.apache.tez.common.counters.TezCounters) List(java.util.List) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Preconditions(com.google.common.base.Preconditions) TezSessionPoolManager(org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Progress(org.apache.tez.dag.api.client.Progress) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounters(org.apache.tez.common.counters.TezCounters) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) StopWatch(org.apache.tez.util.StopWatch) DAGStatus(org.apache.tez.dag.api.client.DAGStatus)

Example 7 with DAG

use of org.apache.tez.dag.api.DAG in project hive by apache.

the class TestTezTask method testExtraResourcesAddedToDag.

@Test
public void testExtraResourcesAddedToDag() throws Exception {
    final String[] inputOutputJars = new String[] { "file:///tmp/foo.jar" };
    LocalResource res = mock(LocalResource.class);
    final List<LocalResource> resources = Collections.singletonList(res);
    final Map<String, LocalResource> resMap = new HashMap<String, LocalResource>();
    resMap.put("foo.jar", res);
    DAG dag = mock(DAG.class);
    when(utils.localizeTempFiles(path.toString(), conf, inputOutputJars)).thenReturn(resources);
    when(utils.getBaseName(res)).thenReturn("foo.jar");
    when(sessionState.isOpen()).thenReturn(true);
    when(sessionState.isOpening()).thenReturn(false);
    when(sessionState.hasResources(inputOutputJars)).thenReturn(false);
    task.addExtraResourcesToDag(sessionState, dag, inputOutputJars, resMap);
    verify(dag).addTaskLocalFiles(resMap);
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DAG(org.apache.tez.dag.api.DAG) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) Test(org.junit.Test)

Example 8 with DAG

use of org.apache.tez.dag.api.DAG in project hive by apache.

the class TezTask method build.

DAG build(JobConf conf, TezWork work, Path scratchDir, LocalResource appJarLr, List<LocalResource> additionalLr, Context ctx) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    // getAllWork returns a topologically sorted list, which we use to make
    // sure that vertices are created before they are used in edges.
    List<BaseWork> ws = work.getAllWork();
    Collections.reverse(ws);
    FileSystem fs = scratchDir.getFileSystem(conf);
    // the name of the dag is what is displayed in the AM/Job UI
    String dagName = utils.createDagName(conf, queryPlan);
    LOG.info("Dag name: " + dagName);
    DAG dag = DAG.create(dagName);
    // set some info for the query
    JSONObject json = new JSONObject(new LinkedHashMap()).put("context", "Hive").put("description", ctx.getCmd());
    String dagInfo = json.toString();
    if (LOG.isDebugEnabled()) {
        LOG.debug("DagInfo: " + dagInfo);
    }
    dag.setDAGInfo(dagInfo);
    dag.setCredentials(conf.getCredentials());
    setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
    for (BaseWork w : ws) {
        boolean isFinal = work.getLeaves().contains(w);
        // translate work to vertex
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
        if (w instanceof UnionWork) {
            // Special case for unions. These items translate to VertexGroups
            List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
            List<BaseWork> children = new LinkedList<BaseWork>();
            // proper children of the union
            for (BaseWork v : work.getChildren(w)) {
                EdgeType type = work.getEdgeProperty(w, v).getEdgeType();
                if (type == EdgeType.CONTAINS) {
                    unionWorkItems.add(v);
                } else {
                    children.add(v);
                }
            }
            // create VertexGroup
            Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
            int i = 0;
            for (BaseWork v : unionWorkItems) {
                vertexArray[i++] = workToVertex.get(v);
            }
            VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
            // For a vertex group, all Outputs use the same Key-class, Val-class and partitioner.
            // Pick any one source vertex to figure out the Edge configuration.
            JobConf parentConf = workToConf.get(unionWorkItems.get(0));
            // now hook up the children
            for (BaseWork v : children) {
                // finally we can create the grouped edge
                GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), work.getEdgeProperty(w, v), work.getVertexType(v));
                dag.addEdge(e);
            }
        } else {
            // Regular vertices
            JobConf wxConf = utils.initializeVertexConf(conf, ctx, w);
            Vertex wx = utils.createVertex(wxConf, w, scratchDir, appJarLr, additionalLr, fs, ctx, !isFinal, work, work.getVertexType(w));
            if (w.getReservedMemoryMB() > 0) {
                // If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
                double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf);
                LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
                wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
            }
            // Otherwise just leave it up to Tez to decide how much memory to allocate
            dag.addVertex(wx);
            utils.addCredentials(w, dag);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
            workToVertex.put(w, wx);
            workToConf.put(w, wxConf);
            // add all dependencies (i.e.: edges) to the graph
            for (BaseWork v : work.getChildren(w)) {
                assert workToVertex.containsKey(v);
                Edge e = null;
                TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
                e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, work.getVertexType(v));
                dag.addEdge(e);
            }
        }
    }
    // Clear the work map after build. TODO: remove caching instead?
    Utilities.clearWorkMap(conf);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    return dag;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) UnionWork(org.apache.hadoop.hive.ql.plan.UnionWork) DAG(org.apache.tez.dag.api.DAG) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) LinkedList(java.util.LinkedList) LinkedHashMap(java.util.LinkedHashMap) VertexGroup(org.apache.tez.dag.api.VertexGroup) JSONObject(org.json.JSONObject) FileSystem(org.apache.hadoop.fs.FileSystem) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) JobConf(org.apache.hadoop.mapred.JobConf) Edge(org.apache.tez.dag.api.Edge) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 9 with DAG

use of org.apache.tez.dag.api.DAG in project hive by apache.

the class TezTask method build.

DAG build(JobConf conf, TezWork work, Path scratchDir, Context ctx, Map<String, LocalResource> vertexResources) throws Exception {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    // getAllWork returns a topologically sorted list, which we use to make
    // sure that vertices are created before they are used in edges.
    List<BaseWork> ws = work.getAllWork();
    Collections.reverse(ws);
    FileSystem fs = scratchDir.getFileSystem(conf);
    // the name of the dag is what is displayed in the AM/Job UI
    String dagName = utils.createDagName(conf, queryPlan);
    LOG.info("Dag name: " + dagName);
    DAG dag = DAG.create(dagName);
    // set some info for the query
    JSONObject json = new JSONObject(new LinkedHashMap<>()).put("context", "Hive").put("description", ctx.getCmd());
    String dagInfo = json.toString();
    if (LOG.isDebugEnabled()) {
        LOG.debug("DagInfo: " + dagInfo);
    }
    dag.setDAGInfo(dagInfo);
    dag.setCredentials(conf.getCredentials());
    setAccessControlsForCurrentUser(dag, queryPlan.getQueryId(), conf);
    for (BaseWork w : ws) {
        boolean isFinal = work.getLeaves().contains(w);
        // translate work to vertex
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
        if (w instanceof UnionWork) {
            // Special case for unions. These items translate to VertexGroups
            List<BaseWork> unionWorkItems = new LinkedList<BaseWork>();
            List<BaseWork> children = new LinkedList<BaseWork>();
            // proper children of the union
            for (BaseWork v : work.getChildren(w)) {
                EdgeType type = work.getEdgeProperty(w, v).getEdgeType();
                if (type == EdgeType.CONTAINS) {
                    unionWorkItems.add(v);
                } else {
                    children.add(v);
                }
            }
            JobConf parentConf = workToConf.get(unionWorkItems.get(0));
            checkOutputSpec(w, parentConf);
            // create VertexGroup
            Vertex[] vertexArray = new Vertex[unionWorkItems.size()];
            int i = 0;
            for (BaseWork v : unionWorkItems) {
                vertexArray[i++] = workToVertex.get(v);
            }
            VertexGroup group = dag.createVertexGroup(w.getName(), vertexArray);
            // now hook up the children
            for (BaseWork v : children) {
                // finally we can create the grouped edge
                GroupInputEdge e = utils.createEdge(group, parentConf, workToVertex.get(v), work.getEdgeProperty(w, v), v, work);
                dag.addEdge(e);
            }
        } else {
            // Regular vertices
            JobConf wxConf = utils.initializeVertexConf(conf, ctx, w);
            checkOutputSpec(w, wxConf);
            Vertex wx = utils.createVertex(wxConf, w, scratchDir, fs, ctx, !isFinal, work, work.getVertexType(w), vertexResources);
            if (w.getReservedMemoryMB() > 0) {
                // If reversedMemoryMB is set, make memory allocation fraction adjustment as needed
                double frac = DagUtils.adjustMemoryReserveFraction(w.getReservedMemoryMB(), super.conf);
                LOG.info("Setting " + TEZ_MEMORY_RESERVE_FRACTION + " to " + frac);
                wx.setConf(TEZ_MEMORY_RESERVE_FRACTION, Double.toString(frac));
            }
            // Otherwise just leave it up to Tez to decide how much memory to allocate
            dag.addVertex(wx);
            utils.addCredentials(w, dag);
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_CREATE_VERTEX + w.getName());
            workToVertex.put(w, wx);
            workToConf.put(w, wxConf);
            // add all dependencies (i.e.: edges) to the graph
            for (BaseWork v : work.getChildren(w)) {
                assert workToVertex.containsKey(v);
                Edge e = null;
                TezEdgeProperty edgeProp = work.getEdgeProperty(w, v);
                e = utils.createEdge(wxConf, wx, workToVertex.get(v), edgeProp, v, work);
                dag.addEdge(e);
            }
        }
    }
    // Clear the work map after build. TODO: remove caching instead?
    Utilities.clearWorkMap(conf);
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_BUILD_DAG);
    return dag;
}
Also used : Vertex(org.apache.tez.dag.api.Vertex) TezEdgeProperty(org.apache.hadoop.hive.ql.plan.TezEdgeProperty) UnionWork(org.apache.hadoop.hive.ql.plan.UnionWork) DAG(org.apache.tez.dag.api.DAG) EdgeType(org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType) LinkedList(java.util.LinkedList) VertexGroup(org.apache.tez.dag.api.VertexGroup) JSONObject(org.json.JSONObject) FileSystem(org.apache.hadoop.fs.FileSystem) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) JobConf(org.apache.hadoop.mapred.JobConf) Edge(org.apache.tez.dag.api.Edge) GroupInputEdge(org.apache.tez.dag.api.GroupInputEdge)

Example 10 with DAG

use of org.apache.tez.dag.api.DAG in project hive by apache.

the class TezTask method execute.

@Override
public int execute(DriverContext driverContext) {
    int rc = 1;
    boolean cleanContext = false;
    Context ctx = null;
    Ref<TezSessionState> sessionRef = Ref.from(null);
    try {
        // Get or create Context object. If we create it we have to clean it later as well.
        ctx = driverContext.getCtx();
        if (ctx == null) {
            ctx = new Context(conf);
            cleanContext = true;
            // some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
            // Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
            String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
            WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
            ctx.setWmContext(wmContext);
        }
        // Need to remove this static hack. But this is the way currently to get a session.
        SessionState ss = SessionState.get();
        // Note: given that we return pool sessions to the pool in the finally block below, and that
        // we need to set the global to null to do that, this "reuse" may be pointless.
        TezSessionState session = sessionRef.value = ss.getTezSession();
        if (session != null && !session.isOpen()) {
            LOG.warn("The session: " + session + " has not been opened");
        }
        // We only need a username for UGI to use for groups; getGroups will fetch the groups
        // based on Hadoop configuration, as documented at
        // https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
        String userName = ss.getUserName();
        List<String> groups = null;
        if (userName == null) {
            userName = "anonymous";
        } else {
            groups = UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups();
        }
        MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
        WmContext wmContext = ctx.getWmContext();
        // jobConf will hold all the configuration for hadoop, tez, and hive
        JobConf jobConf = utils.createConfiguration(conf);
        // Get all user jars from work (e.g. input format stuff).
        String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
        // DAG scratch dir. We get a session from the pool so it may be different from Tez one.
        // TODO: we could perhaps reuse the same directory for HiveResources?
        Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
        CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        try {
            ss.setTezSession(session);
            LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
            // Ensure the session is open and has the necessary local resources.
            // This would refresh any conf resources and also local resources.
            ensureSessionHasResources(session, allNonConfFiles);
            // This is a combination of the jar stuff from conf, and not from conf.
            List<LocalResource> allNonAppResources = session.getLocalizedResources();
            logResources(allNonAppResources);
            Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
            // next we translate the TezWork to a Tez DAG
            DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
            dag.setCallerContext(callerContext);
            // Check isShutdown opportunistically; it's never unset.
            if (this.isShutdown) {
                throw new HiveException("Operation cancelled");
            }
            DAGClient dagClient = submit(jobConf, dag, sessionRef);
            session = sessionRef.value;
            boolean wasShutdown = false;
            synchronized (dagClientLock) {
                assert this.dagClient == null;
                wasShutdown = this.isShutdown;
                if (!wasShutdown) {
                    this.dagClient = dagClient;
                }
            }
            if (wasShutdown) {
                closeDagClientOnCancellation(dagClient);
                throw new HiveException("Operation cancelled");
            }
            // finally monitor will print progress until the job is done
            TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx);
            rc = monitor.monitorExecution();
            if (rc != 0) {
                this.setException(new HiveException(monitor.getDiagnostics()));
            }
            // fetch the counters
            try {
                Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
            } catch (Exception err) {
                // Don't fail execution due to counters - just don't print summary info
                LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
                counters = null;
            }
        } finally {
            // Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
            // Currently, reopen on an attempted reuse will take care of that; we cannot tell
            // if the session is usable until we try.
            // We return this to the pool even if it's unusable; reopen is supposed to handle this.
            wmContext = ctx.getWmContext();
            try {
                if (sessionRef.value != null) {
                    sessionRef.value.returnToSessionManager();
                }
            } catch (Exception e) {
                LOG.error("Failed to return session: {} to pool", session, e);
                throw e;
            }
            if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
                if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
                    wmContext.printJson(console);
                } else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
                    wmContext.print(console);
                }
            }
        }
        if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
            for (CounterGroup group : counters) {
                LOG.info(group.getDisplayName() + ":");
                for (TezCounter counter : group) {
                    LOG.info("   " + counter.getDisplayName() + ": " + counter.getValue());
                }
            }
        }
    } catch (Exception e) {
        LOG.error("Failed to execute tez graph.", e);
    // rc will be 1 at this point indicating failure.
    } finally {
        Utilities.clearWork(conf);
        // Clear gWorkMap
        for (BaseWork w : work.getAllWork()) {
            JobConf workCfg = workToConf.get(w);
            if (workCfg != null) {
                Utilities.clearWorkMapForConf(workCfg);
            }
        }
        if (cleanContext) {
            try {
                ctx.clear();
            } catch (Exception e) {
                /*best effort*/
                LOG.warn("Failed to clean up after tez job", e);
            }
        }
        // need to either move tmp files or remove them
        DAGClient dagClient = null;
        synchronized (dagClientLock) {
            dagClient = this.dagClient;
            this.dagClient = null;
        }
        // DagClient as such should have no bearing on jobClose.
        if (dagClient != null) {
            // rc will only be overwritten if close errors out
            rc = close(work, rc, dagClient);
        }
    }
    return rc;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CallerContext(org.apache.tez.client.CallerContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounter(org.apache.tez.common.counters.TezCounter) MappingInput(org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput) TezJobMonitor(org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CallerContext(org.apache.tez.client.CallerContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Path(org.apache.hadoop.fs.Path) CounterGroup(org.apache.tez.common.counters.CounterGroup) DAG(org.apache.tez.dag.api.DAG) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient)

Aggregations

DAG (org.apache.tez.dag.api.DAG)11 Context (org.apache.hadoop.hive.ql.Context)6 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)6 JobConf (org.apache.hadoop.mapred.JobConf)5 Vertex (org.apache.tez.dag.api.Vertex)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)4 Path (org.apache.hadoop.fs.Path)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 FileNotFoundException (java.io.FileNotFoundException)2 URISyntaxException (java.net.URISyntaxException)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 LoginException (javax.security.auth.login.LoginException)2 LlapInputSplit (org.apache.hadoop.hive.llap.LlapInputSplit)2 SubmitWorkInfo (org.apache.hadoop.hive.llap.SubmitWorkInfo)2 LlapCoordinator (org.apache.hadoop.hive.llap.coordinator.LlapCoordinator)2