Search in sources :

Example 16 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project hive by apache.

the class TezTask method execute.

@Override
public int execute() {
    int rc = 1;
    boolean cleanContext = false;
    Context ctx = null;
    Ref<TezSessionState> sessionRef = Ref.from(null);
    final String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
    try {
        // Get or create Context object. If we create it we have to clean it later as well.
        ctx = context;
        if (ctx == null) {
            ctx = new Context(conf);
            cleanContext = true;
            // some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
            // Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
            WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
            ctx.setWmContext(wmContext);
        }
        // Need to remove this static hack. But this is the way currently to get a session.
        SessionState ss = SessionState.get();
        // Note: given that we return pool sessions to the pool in the finally block below, and that
        // we need to set the global to null to do that, this "reuse" may be pointless.
        TezSessionState session = sessionRef.value = ss.getTezSession();
        if (session != null && !session.isOpen()) {
            LOG.warn("The session: " + session + " has not been opened");
        }
        // We only need a username for UGI to use for groups; getGroups will fetch the groups
        // based on Hadoop configuration, as documented at
        // https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
        String userName = getUserNameForGroups(ss);
        List<String> groups = null;
        if (userName == null) {
            userName = "anonymous";
        } else {
            try {
                groups = UserGroupInformation.createRemoteUser(userName).getGroups();
            } catch (Exception ex) {
                LOG.warn("Cannot obtain groups for " + userName, ex);
            }
        }
        MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
        WmContext wmContext = ctx.getWmContext();
        // jobConf will hold all the configuration for hadoop, tez, and hive, which are not set in AM defaults
        JobConf jobConf = utils.createConfiguration(conf, false);
        // Setup the job specific keystore path if exists and put the password into the environment variables of tez am/tasks.
        HiveConfUtil.updateJobCredentialProviders(jobConf);
        // Get all user jars from work (e.g. input format stuff).
        String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
        // DAG scratch dir. We get a session from the pool so it may be different from Tez one.
        // TODO: we could perhaps reuse the same directory for HiveResources?
        Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
        CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
        perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
        perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        try {
            ss.setTezSession(session);
            LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
            // Ensure the session is open and has the necessary local resources.
            // This would refresh any conf resources and also local resources.
            ensureSessionHasResources(session, allNonConfFiles);
            // This is a combination of the jar stuff from conf, and not from conf.
            List<LocalResource> allNonAppResources = session.getLocalizedResources();
            logResources(allNonAppResources);
            Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
            // next we translate the TezWork to a Tez DAG
            DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
            dag.setCallerContext(callerContext);
            // Check isShutdown opportunistically; it's never unset.
            if (this.isShutdown) {
                throw new HiveException("Operation cancelled");
            }
            DAGClient dagClient = submit(dag, sessionRef);
            session = sessionRef.value;
            boolean wasShutdown = false;
            synchronized (dagClientLock) {
                assert this.dagClient == null;
                wasShutdown = this.isShutdown;
                if (!wasShutdown) {
                    this.dagClient = dagClient;
                }
            }
            if (wasShutdown) {
                closeDagClientOnCancellation(dagClient);
                throw new HiveException("Operation cancelled");
            }
            // Log all the info required to find the various logs for this query
            LOG.info("HS2 Host: [{}], Query ID: [{}], Dag ID: [{}], DAG Session ID: [{}]", ServerUtils.hostname(), queryId, this.dagClient.getDagIdentifierString(), this.dagClient.getSessionIdentifierString());
            // finally monitor will print progress until the job is done
            TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx, counters);
            rc = monitor.monitorExecution();
            if (rc != 0) {
                this.setException(new HiveException(monitor.getDiagnostics()));
            }
            try {
                // fetch the counters
                Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                TezCounters dagCounters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
                // if initial counters exists, merge it with dag counters to get aggregated view
                TezCounters mergedCounters = counters == null ? dagCounters : Utils.mergeTezCounters(dagCounters, counters);
                counters = mergedCounters;
            } catch (Exception err) {
                // Don't fail execution due to counters - just don't print summary info
                LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete.", err);
                counters = null;
            }
            // save useful commit information into query state, e.g. for custom commit hooks, like Iceberg
            if (rc == 0) {
                collectCommitInformation(work);
            }
        } finally {
            // Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
            // Currently, reopen on an attempted reuse will take care of that; we cannot tell
            // if the session is usable until we try.
            // We return this to the pool even if it's unusable; reopen is supposed to handle this.
            wmContext = ctx.getWmContext();
            try {
                if (sessionRef.value != null) {
                    sessionRef.value.returnToSessionManager();
                }
            } catch (Exception e) {
                LOG.error("Failed to return session: {} to pool", session, e);
                throw e;
            }
            if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
                if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
                    wmContext.printJson(console);
                } else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
                    wmContext.print(console);
                }
            }
        }
        if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
            for (CounterGroup group : counters) {
                LOG.info(group.getDisplayName() + ":");
                for (TezCounter counter : group) {
                    LOG.info("   " + counter.getDisplayName() + ": " + counter.getValue());
                }
            }
        }
        updateNumRows();
    } catch (Exception e) {
        LOG.error("Failed to execute tez graph.", e);
        setException(e);
    // rc will be 1 at this point indicating failure.
    } finally {
        Utilities.clearWork(conf);
        // Clear gWorkMap
        for (BaseWork w : work.getAllWork()) {
            JobConf workCfg = workToConf.get(w);
            if (workCfg != null) {
                Utilities.clearWorkMapForConf(workCfg);
            }
        }
        if (cleanContext) {
            try {
                ctx.clear();
            } catch (Exception e) {
                /*best effort*/
                LOG.warn("Failed to clean up after tez job", e);
            }
        }
        // need to either move tmp files or remove them
        DAGClient dagClient = null;
        synchronized (dagClientLock) {
            dagClient = this.dagClient;
            this.dagClient = null;
        }
        // DagClient as such should have no bearing on jobClose.
        if (dagClient != null) {
            // rc will only be overwritten if close errors out
            rc = close(work, rc, dagClient);
        }
    }
    return rc;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CallerContext(org.apache.tez.client.CallerContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounter(org.apache.tez.common.counters.TezCounter) MappingInput(org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput) TezJobMonitor(org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CallerContext(org.apache.tez.client.CallerContext) Context(org.apache.hadoop.hive.ql.Context) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Path(org.apache.hadoop.fs.Path) CounterGroup(org.apache.tez.common.counters.CounterGroup) DAG(org.apache.tez.dag.api.DAG) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) TezCounters(org.apache.tez.common.counters.TezCounters) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient)

Aggregations

CounterGroup (org.apache.tez.common.counters.CounterGroup)16 TezCounter (org.apache.tez.common.counters.TezCounter)13 TezCounters (org.apache.tez.common.counters.TezCounters)8 SessionState (org.apache.hadoop.hive.ql.session.SessionState)4 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 DAG (org.apache.tez.dag.api.DAG)3 DAGClient (org.apache.tez.dag.api.client.DAGClient)3 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 Context (org.apache.hadoop.hive.ql.Context)2 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)2 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)2 MappingInput (org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput)2 TezJobMonitor (org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2 WmContext (org.apache.hadoop.hive.ql.wm.WmContext)2