Search in sources :

Example 6 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project tez by apache.

the class DagTypeConverters method convertTezCountersToProto.

public static TezCountersProto convertTezCountersToProto(TezCounters counters) {
    TezCountersProto.Builder builder = TezCountersProto.newBuilder();
    Iterator<CounterGroup> groupIterator = counters.iterator();
    int groupIndex = 0;
    while (groupIterator.hasNext()) {
        CounterGroup counterGroup = groupIterator.next();
        TezCounterGroupProto.Builder groupBuilder = TezCounterGroupProto.newBuilder();
        groupBuilder.setName(counterGroup.getName());
        groupBuilder.setDisplayName(counterGroup.getDisplayName());
        Iterator<TezCounter> counterIterator = counterGroup.iterator();
        int counterIndex = 0;
        while (counterIterator.hasNext()) {
            TezCounter counter = counterIterator.next();
            TezCounterProto tezCounterProto = TezCounterProto.newBuilder().setName(counter.getName()).setDisplayName(counter.getDisplayName()).setValue(counter.getValue()).build();
            groupBuilder.addCounters(counterIndex, tezCounterProto);
            ++counterIndex;
        }
        builder.addCounterGroups(groupIndex, groupBuilder.build());
        ++groupIndex;
    }
    return builder.build();
}
Also used : TezCounterGroupProto(org.apache.tez.dag.api.records.DAGProtos.TezCounterGroupProto) TezCounterProto(org.apache.tez.dag.api.records.DAGProtos.TezCounterProto) TezCountersProto(org.apache.tez.dag.api.records.DAGProtos.TezCountersProto) CounterGroup(org.apache.tez.common.counters.CounterGroup) TezCounter(org.apache.tez.common.counters.TezCounter) PlanTaskLocationHint(org.apache.tez.dag.api.records.DAGProtos.PlanTaskLocationHint)

Example 7 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project hive by apache.

the class TezJobMonitor method getCounterValues.

private Map<String, Long> getCounterValues(final TezCounters dagCounters, final List<String> vertexNames, final Map<String, Progress> vertexProgressMap, final Set<String> desiredCounters, final boolean done) {
    // DAG specific counters
    Map<String, Long> updatedCounters = new HashMap<>();
    for (CounterGroup counterGroup : dagCounters) {
        for (TezCounter tezCounter : counterGroup) {
            String counterName = tezCounter.getName();
            for (String desiredCounter : desiredCounters) {
                if (counterName.equals(desiredCounter)) {
                    updatedCounters.put(counterName, tezCounter.getValue());
                } else if (isDagLevelCounter(desiredCounter)) {
                    // by default, we aggregate counters across the entire DAG. Example: SHUFFLE_BYTES would mean SHUFFLE_BYTES
                    // of each vertex aggregated together to create DAG level SHUFFLE_BYTES.
                    // Use case: If SHUFFLE_BYTES across the entire DAG is > limit perform action
                    String prefixRemovedCounterName = getCounterFromDagCounter(desiredCounter);
                    aggregateCountersSum(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
                } else if (isVertexLevelCounter(desiredCounter)) {
                    // if counter name starts with VERTEX_ then we just return max value across all vertex since trigger
                    // validation is only interested in violation that are greater than limit (*any* vertex violation).
                    // Use case: If SHUFFLE_BYTES for any single vertex is > limit perform action
                    String prefixRemovedCounterName = getCounterFromVertexCounter(desiredCounter);
                    aggregateCountersMax(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
                } else if (counterName.startsWith(desiredCounter)) {
                    // Counters with vertex name as suffix
                    // desiredCounter = INPUT_FILES
                    // counters: {INPUT_FILES_Map_1 : 5, INPUT_FILES_Map_4 : 10}
                    // outcome: INPUT_FILE : 15
                    String prefixRemovedCounterName = desiredCounter;
                    aggregateCountersSum(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
                }
            }
        }
    }
    // Process per vertex counters that are available only via vertex Progress
    String counterName = VertexCounterLimit.VertexCounter.VERTEX_TOTAL_TASKS.name();
    if (desiredCounters.contains(counterName) && vertexProgressMap != null) {
        for (Map.Entry<String, Progress> entry : vertexProgressMap.entrySet()) {
            long currentMax = 0;
            if (updatedCounters.containsKey(counterName)) {
                currentMax = updatedCounters.get(counterName);
            }
            long newMax = Math.max(currentMax, entry.getValue().getTotalTaskCount());
            updatedCounters.put(counterName, newMax);
        }
    }
    counterName = VertexCounterLimit.VertexCounter.DAG_TOTAL_TASKS.name();
    if (desiredCounters.contains(counterName) && vertexProgressMap != null) {
        for (Map.Entry<String, Progress> entry : vertexProgressMap.entrySet()) {
            long currentTotal = 0;
            if (updatedCounters.containsKey(counterName)) {
                currentTotal = updatedCounters.get(counterName);
            }
            long newTotal = currentTotal + entry.getValue().getTotalTaskCount();
            updatedCounters.put(counterName, newTotal);
        }
    }
    // Time based counters. If DAG is done already don't update these counters.
    if (!done) {
        counterName = TimeCounterLimit.TimeCounter.EXECUTION_TIME.name();
        if (desiredCounters.contains(counterName) && executionStartTime > 0) {
            updatedCounters.put(counterName, System.currentTimeMillis() - executionStartTime);
        }
    }
    return updatedCounters;
}
Also used : Progress(org.apache.tez.dag.api.client.Progress) HashMap(java.util.HashMap) CounterGroup(org.apache.tez.common.counters.CounterGroup) TezCounter(org.apache.tez.common.counters.TezCounter) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project hive by apache.

the class PostExecTezSummaryPrinter method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
        return;
    }
    LOG.info("Executing post execution hook to print tez summary..");
    SessionState ss = SessionState.get();
    SessionState.LogHelper console = ss.getConsole();
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
    for (TezTask tezTask : rootTasks) {
        LOG.info("Printing summary for tez task: " + tezTask.getName());
        TezCounters counters = tezTask.getTezCounters();
        if (counters != null) {
            String hiveCountersGroup = HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP);
            for (CounterGroup group : counters) {
                if (hiveCountersGroup.equals(group.getDisplayName())) {
                    console.printInfo(tezTask.getId() + " HIVE COUNTERS:", false);
                    for (TezCounter counter : group) {
                        // Here we just filter out time counters (like HASHTABLE_LOAD_TIME_MS) that may differ across runs
                        if (!counter.getName().contains("TIME")) {
                            console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                        }
                    }
                } else if (group.getName().equals(HiveInputCounters.class.getName())) {
                    console.printInfo(tezTask.getId() + " INPUT COUNTERS:", false);
                    for (TezCounter counter : group) {
                        console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                    }
                } else if (group.getName().equals(FileSystemCounter.class.getName())) {
                    console.printInfo(tezTask.getId() + " FILE SYSTEM COUNTERS:", false);
                    for (TezCounter counter : group) {
                        // local file system counters
                        if (counter.getName().contains("HDFS")) {
                            console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                        }
                    }
                } else if (group.getName().equals(LlapIOCounters.class.getName())) {
                    console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
                    List<String> testSafeCounters = LlapIOCounters.testSafeCounterNames();
                    for (TezCounter counter : group) {
                        if (testSafeCounters.contains(counter.getDisplayName())) {
                            console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                        }
                    }
                } else if (group.getName().equals(CompileTimeCounters.class.getName())) {
                    console.printInfo(tezTask.getId() + " COMPILE TIME COUNTERS:", false);
                    for (TezCounter counter : group) {
                        console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                    }
                }
            }
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CounterGroup(org.apache.tez.common.counters.CounterGroup) TezCounter(org.apache.tez.common.counters.TezCounter) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) FileSystemCounter(org.apache.tez.common.counters.FileSystemCounter) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezCounters(org.apache.tez.common.counters.TezCounters) HiveConf(org.apache.hadoop.hive.conf.HiveConf) List(java.util.List) CompileTimeCounters(org.apache.hadoop.hive.ql.exec.tez.CompileTimeCounters)

Example 9 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project hive by apache.

the class TezTask method execute.

@Override
public int execute(DriverContext driverContext) {
    int rc = 1;
    boolean cleanContext = false;
    Context ctx = null;
    Ref<TezSessionState> sessionRef = Ref.from(null);
    try {
        // Get or create Context object. If we create it we have to clean it later as well.
        ctx = driverContext.getCtx();
        if (ctx == null) {
            ctx = new Context(conf);
            cleanContext = true;
            // some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
            // Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
            String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
            WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
            ctx.setWmContext(wmContext);
        }
        // Need to remove this static hack. But this is the way currently to get a session.
        SessionState ss = SessionState.get();
        // Note: given that we return pool sessions to the pool in the finally block below, and that
        // we need to set the global to null to do that, this "reuse" may be pointless.
        TezSessionState session = sessionRef.value = ss.getTezSession();
        if (session != null && !session.isOpen()) {
            LOG.warn("The session: " + session + " has not been opened");
        }
        // We only need a username for UGI to use for groups; getGroups will fetch the groups
        // based on Hadoop configuration, as documented at
        // https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
        String userName = ss.getUserName();
        List<String> groups = null;
        if (userName == null) {
            userName = "anonymous";
        } else {
            groups = UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups();
        }
        MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
        WmContext wmContext = ctx.getWmContext();
        // jobConf will hold all the configuration for hadoop, tez, and hive
        JobConf jobConf = utils.createConfiguration(conf);
        // Get all user jars from work (e.g. input format stuff).
        String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
        // DAG scratch dir. We get a session from the pool so it may be different from Tez one.
        // TODO: we could perhaps reuse the same directory for HiveResources?
        Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
        CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
        try {
            ss.setTezSession(session);
            LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
            // Ensure the session is open and has the necessary local resources.
            // This would refresh any conf resources and also local resources.
            ensureSessionHasResources(session, allNonConfFiles);
            // This is a combination of the jar stuff from conf, and not from conf.
            List<LocalResource> allNonAppResources = session.getLocalizedResources();
            logResources(allNonAppResources);
            Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
            // next we translate the TezWork to a Tez DAG
            DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
            dag.setCallerContext(callerContext);
            // Check isShutdown opportunistically; it's never unset.
            if (this.isShutdown) {
                throw new HiveException("Operation cancelled");
            }
            DAGClient dagClient = submit(jobConf, dag, sessionRef);
            session = sessionRef.value;
            boolean wasShutdown = false;
            synchronized (dagClientLock) {
                assert this.dagClient == null;
                wasShutdown = this.isShutdown;
                if (!wasShutdown) {
                    this.dagClient = dagClient;
                }
            }
            if (wasShutdown) {
                closeDagClientOnCancellation(dagClient);
                throw new HiveException("Operation cancelled");
            }
            // finally monitor will print progress until the job is done
            TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx);
            rc = monitor.monitorExecution();
            if (rc != 0) {
                this.setException(new HiveException(monitor.getDiagnostics()));
            }
            // fetch the counters
            try {
                Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
                counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
            } catch (Exception err) {
                // Don't fail execution due to counters - just don't print summary info
                LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
                counters = null;
            }
        } finally {
            // Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
            // Currently, reopen on an attempted reuse will take care of that; we cannot tell
            // if the session is usable until we try.
            // We return this to the pool even if it's unusable; reopen is supposed to handle this.
            wmContext = ctx.getWmContext();
            try {
                if (sessionRef.value != null) {
                    sessionRef.value.returnToSessionManager();
                }
            } catch (Exception e) {
                LOG.error("Failed to return session: {} to pool", session, e);
                throw e;
            }
            if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
                if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
                    wmContext.printJson(console);
                } else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
                    wmContext.print(console);
                }
            }
        }
        if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
            for (CounterGroup group : counters) {
                LOG.info(group.getDisplayName() + ":");
                for (TezCounter counter : group) {
                    LOG.info("   " + counter.getDisplayName() + ": " + counter.getValue());
                }
            }
        }
    } catch (Exception e) {
        LOG.error("Failed to execute tez graph.", e);
    // rc will be 1 at this point indicating failure.
    } finally {
        Utilities.clearWork(conf);
        // Clear gWorkMap
        for (BaseWork w : work.getAllWork()) {
            JobConf workCfg = workToConf.get(w);
            if (workCfg != null) {
                Utilities.clearWorkMapForConf(workCfg);
            }
        }
        if (cleanContext) {
            try {
                ctx.clear();
            } catch (Exception e) {
                /*best effort*/
                LOG.warn("Failed to clean up after tez job", e);
            }
        }
        // need to either move tmp files or remove them
        DAGClient dagClient = null;
        synchronized (dagClientLock) {
            dagClient = this.dagClient;
            this.dagClient = null;
        }
        // DagClient as such should have no bearing on jobClose.
        if (dagClient != null) {
            // rc will only be overwritten if close errors out
            rc = close(work, rc, dagClient);
        }
    }
    return rc;
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) CallerContext(org.apache.tez.client.CallerContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounter(org.apache.tez.common.counters.TezCounter) MappingInput(org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput) TezJobMonitor(org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor) JobConf(org.apache.hadoop.mapred.JobConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CallerContext(org.apache.tez.client.CallerContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Path(org.apache.hadoop.fs.Path) CounterGroup(org.apache.tez.common.counters.CounterGroup) DAG(org.apache.tez.dag.api.DAG) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) LocalResource(org.apache.hadoop.yarn.api.records.LocalResource) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) DAGClient(org.apache.tez.dag.api.client.DAGClient)

Example 10 with CounterGroup

use of org.apache.tez.common.counters.CounterGroup in project tez by apache.

the class DAGUtils method convertCountersToATSMap.

public static Map<String, Object> convertCountersToATSMap(TezCounters counters) {
    Map<String, Object> object = new LinkedHashMap<String, Object>();
    if (counters == null) {
        return object;
    }
    ArrayList<Object> counterGroupsList = new ArrayList<Object>();
    for (CounterGroup group : counters) {
        ArrayList<Object> counterList = new ArrayList<Object>();
        for (TezCounter counter : group) {
            if (counter.getValue() != 0) {
                Map<String, Object> counterMap = new LinkedHashMap<String, Object>();
                counterMap.put(ATSConstants.COUNTER_NAME, counter.getName());
                if (!counter.getDisplayName().equals(counter.getName())) {
                    counterMap.put(ATSConstants.COUNTER_DISPLAY_NAME, counter.getDisplayName());
                }
                counterMap.put(ATSConstants.COUNTER_VALUE, counter.getValue());
                counterList.add(counterMap);
            }
        }
        if (!counterList.isEmpty()) {
            Map<String, Object> counterGroupMap = new LinkedHashMap<String, Object>();
            counterGroupMap.put(ATSConstants.COUNTER_GROUP_NAME, group.getName());
            if (!group.getDisplayName().equals(group.getName())) {
                counterGroupMap.put(ATSConstants.COUNTER_GROUP_DISPLAY_NAME, group.getDisplayName());
            }
            counterGroupMap.put(ATSConstants.COUNTERS, counterList);
            counterGroupsList.add(counterGroupMap);
        }
    }
    putInto(object, ATSConstants.COUNTER_GROUPS, counterGroupsList);
    return object;
}
Also used : CounterGroup(org.apache.tez.common.counters.CounterGroup) ArrayList(java.util.ArrayList) JSONObject(org.codehaus.jettison.json.JSONObject) TezCounter(org.apache.tez.common.counters.TezCounter) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

CounterGroup (org.apache.tez.common.counters.CounterGroup)16 TezCounter (org.apache.tez.common.counters.TezCounter)13 TezCounters (org.apache.tez.common.counters.TezCounters)8 SessionState (org.apache.hadoop.hive.ql.session.SessionState)4 IOException (java.io.IOException)3 Path (org.apache.hadoop.fs.Path)3 DAG (org.apache.tez.dag.api.DAG)3 DAGClient (org.apache.tez.dag.api.client.DAGClient)3 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 HiveConf (org.apache.hadoop.hive.conf.HiveConf)2 Context (org.apache.hadoop.hive.ql.Context)2 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)2 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)2 MappingInput (org.apache.hadoop.hive.ql.exec.tez.UserPoolMapping.MappingInput)2 TezJobMonitor (org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2 WmContext (org.apache.hadoop.hive.ql.wm.WmContext)2