use of org.apache.tez.common.counters.CounterGroup in project tez by apache.
the class DagTypeConverters method convertTezCountersToProto.
public static TezCountersProto convertTezCountersToProto(TezCounters counters) {
TezCountersProto.Builder builder = TezCountersProto.newBuilder();
Iterator<CounterGroup> groupIterator = counters.iterator();
int groupIndex = 0;
while (groupIterator.hasNext()) {
CounterGroup counterGroup = groupIterator.next();
TezCounterGroupProto.Builder groupBuilder = TezCounterGroupProto.newBuilder();
groupBuilder.setName(counterGroup.getName());
groupBuilder.setDisplayName(counterGroup.getDisplayName());
Iterator<TezCounter> counterIterator = counterGroup.iterator();
int counterIndex = 0;
while (counterIterator.hasNext()) {
TezCounter counter = counterIterator.next();
TezCounterProto tezCounterProto = TezCounterProto.newBuilder().setName(counter.getName()).setDisplayName(counter.getDisplayName()).setValue(counter.getValue()).build();
groupBuilder.addCounters(counterIndex, tezCounterProto);
++counterIndex;
}
builder.addCounterGroups(groupIndex, groupBuilder.build());
++groupIndex;
}
return builder.build();
}
use of org.apache.tez.common.counters.CounterGroup in project hive by apache.
the class TezJobMonitor method getCounterValues.
private Map<String, Long> getCounterValues(final TezCounters dagCounters, final List<String> vertexNames, final Map<String, Progress> vertexProgressMap, final Set<String> desiredCounters, final boolean done) {
// DAG specific counters
Map<String, Long> updatedCounters = new HashMap<>();
for (CounterGroup counterGroup : dagCounters) {
for (TezCounter tezCounter : counterGroup) {
String counterName = tezCounter.getName();
for (String desiredCounter : desiredCounters) {
if (counterName.equals(desiredCounter)) {
updatedCounters.put(counterName, tezCounter.getValue());
} else if (isDagLevelCounter(desiredCounter)) {
// by default, we aggregate counters across the entire DAG. Example: SHUFFLE_BYTES would mean SHUFFLE_BYTES
// of each vertex aggregated together to create DAG level SHUFFLE_BYTES.
// Use case: If SHUFFLE_BYTES across the entire DAG is > limit perform action
String prefixRemovedCounterName = getCounterFromDagCounter(desiredCounter);
aggregateCountersSum(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
} else if (isVertexLevelCounter(desiredCounter)) {
// if counter name starts with VERTEX_ then we just return max value across all vertex since trigger
// validation is only interested in violation that are greater than limit (*any* vertex violation).
// Use case: If SHUFFLE_BYTES for any single vertex is > limit perform action
String prefixRemovedCounterName = getCounterFromVertexCounter(desiredCounter);
aggregateCountersMax(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
} else if (counterName.startsWith(desiredCounter)) {
// Counters with vertex name as suffix
// desiredCounter = INPUT_FILES
// counters: {INPUT_FILES_Map_1 : 5, INPUT_FILES_Map_4 : 10}
// outcome: INPUT_FILE : 15
String prefixRemovedCounterName = desiredCounter;
aggregateCountersSum(updatedCounters, vertexNames, prefixRemovedCounterName, desiredCounter, tezCounter);
}
}
}
}
// Process per vertex counters that are available only via vertex Progress
String counterName = VertexCounterLimit.VertexCounter.VERTEX_TOTAL_TASKS.name();
if (desiredCounters.contains(counterName) && vertexProgressMap != null) {
for (Map.Entry<String, Progress> entry : vertexProgressMap.entrySet()) {
long currentMax = 0;
if (updatedCounters.containsKey(counterName)) {
currentMax = updatedCounters.get(counterName);
}
long newMax = Math.max(currentMax, entry.getValue().getTotalTaskCount());
updatedCounters.put(counterName, newMax);
}
}
counterName = VertexCounterLimit.VertexCounter.DAG_TOTAL_TASKS.name();
if (desiredCounters.contains(counterName) && vertexProgressMap != null) {
for (Map.Entry<String, Progress> entry : vertexProgressMap.entrySet()) {
long currentTotal = 0;
if (updatedCounters.containsKey(counterName)) {
currentTotal = updatedCounters.get(counterName);
}
long newTotal = currentTotal + entry.getValue().getTotalTaskCount();
updatedCounters.put(counterName, newTotal);
}
}
// Time based counters. If DAG is done already don't update these counters.
if (!done) {
counterName = TimeCounterLimit.TimeCounter.EXECUTION_TIME.name();
if (desiredCounters.contains(counterName) && executionStartTime > 0) {
updatedCounters.put(counterName, System.currentTimeMillis() - executionStartTime);
}
}
return updatedCounters;
}
use of org.apache.tez.common.counters.CounterGroup in project hive by apache.
the class PostExecTezSummaryPrinter method run.
@Override
public void run(HookContext hookContext) throws Exception {
assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
HiveConf conf = hookContext.getConf();
if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
return;
}
LOG.info("Executing post execution hook to print tez summary..");
SessionState ss = SessionState.get();
SessionState.LogHelper console = ss.getConsole();
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
for (TezTask tezTask : rootTasks) {
LOG.info("Printing summary for tez task: " + tezTask.getName());
TezCounters counters = tezTask.getTezCounters();
if (counters != null) {
String hiveCountersGroup = HiveConf.getVar(conf, HiveConf.ConfVars.HIVECOUNTERGROUP);
for (CounterGroup group : counters) {
if (hiveCountersGroup.equals(group.getDisplayName())) {
console.printInfo(tezTask.getId() + " HIVE COUNTERS:", false);
for (TezCounter counter : group) {
// Here we just filter out time counters (like HASHTABLE_LOAD_TIME_MS) that may differ across runs
if (!counter.getName().contains("TIME")) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
} else if (group.getName().equals(HiveInputCounters.class.getName())) {
console.printInfo(tezTask.getId() + " INPUT COUNTERS:", false);
for (TezCounter counter : group) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
} else if (group.getName().equals(FileSystemCounter.class.getName())) {
console.printInfo(tezTask.getId() + " FILE SYSTEM COUNTERS:", false);
for (TezCounter counter : group) {
// local file system counters
if (counter.getName().contains("HDFS")) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
} else if (group.getName().equals(LlapIOCounters.class.getName())) {
console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
List<String> testSafeCounters = LlapIOCounters.testSafeCounterNames();
for (TezCounter counter : group) {
if (testSafeCounters.contains(counter.getDisplayName())) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
} else if (group.getName().equals(CompileTimeCounters.class.getName())) {
console.printInfo(tezTask.getId() + " COMPILE TIME COUNTERS:", false);
for (TezCounter counter : group) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
}
}
}
}
use of org.apache.tez.common.counters.CounterGroup in project hive by apache.
the class TezTask method execute.
@Override
public int execute(DriverContext driverContext) {
int rc = 1;
boolean cleanContext = false;
Context ctx = null;
Ref<TezSessionState> sessionRef = Ref.from(null);
try {
// Get or create Context object. If we create it we have to clean it later as well.
ctx = driverContext.getCtx();
if (ctx == null) {
ctx = new Context(conf);
cleanContext = true;
// some DDL task that directly executes a TezTask does not setup Context and hence TriggerContext.
// Setting queryId is messed up. Some DDL tasks have executionId instead of proper queryId.
String queryId = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYID);
WmContext wmContext = new WmContext(System.currentTimeMillis(), queryId);
ctx.setWmContext(wmContext);
}
// Need to remove this static hack. But this is the way currently to get a session.
SessionState ss = SessionState.get();
// Note: given that we return pool sessions to the pool in the finally block below, and that
// we need to set the global to null to do that, this "reuse" may be pointless.
TezSessionState session = sessionRef.value = ss.getTezSession();
if (session != null && !session.isOpen()) {
LOG.warn("The session: " + session + " has not been opened");
}
// We only need a username for UGI to use for groups; getGroups will fetch the groups
// based on Hadoop configuration, as documented at
// https://hadoop.apache.org/docs/r2.8.0/hadoop-project-dist/hadoop-common/GroupsMapping.html
String userName = ss.getUserName();
List<String> groups = null;
if (userName == null) {
userName = "anonymous";
} else {
groups = UserGroupInformation.createRemoteUser(ss.getUserName()).getGroups();
}
MappingInput mi = new MappingInput(userName, groups, ss.getHiveVariables().get("wmpool"), ss.getHiveVariables().get("wmapp"));
WmContext wmContext = ctx.getWmContext();
// jobConf will hold all the configuration for hadoop, tez, and hive
JobConf jobConf = utils.createConfiguration(conf);
// Get all user jars from work (e.g. input format stuff).
String[] allNonConfFiles = work.configureJobConfAndExtractJars(jobConf);
// DAG scratch dir. We get a session from the pool so it may be different from Tez one.
// TODO: we could perhaps reuse the same directory for HiveResources?
Path scratchDir = utils.createTezDir(ctx.getMRScratchDir(), conf);
CallerContext callerContext = CallerContext.create("HIVE", queryPlan.getQueryId(), "HIVE_QUERY_ID", queryPlan.getQueryStr());
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
session = sessionRef.value = WorkloadManagerFederation.getSession(sessionRef.value, conf, mi, getWork().getLlapMode(), wmContext);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_GET_SESSION);
try {
ss.setTezSession(session);
LOG.info("Subscribed to counters: {} for queryId: {}", wmContext.getSubscribedCounters(), wmContext.getQueryId());
// Ensure the session is open and has the necessary local resources.
// This would refresh any conf resources and also local resources.
ensureSessionHasResources(session, allNonConfFiles);
// This is a combination of the jar stuff from conf, and not from conf.
List<LocalResource> allNonAppResources = session.getLocalizedResources();
logResources(allNonAppResources);
Map<String, LocalResource> allResources = DagUtils.createTezLrMap(session.getAppJarLr(), allNonAppResources);
// next we translate the TezWork to a Tez DAG
DAG dag = build(jobConf, work, scratchDir, ctx, allResources);
dag.setCallerContext(callerContext);
// Check isShutdown opportunistically; it's never unset.
if (this.isShutdown) {
throw new HiveException("Operation cancelled");
}
DAGClient dagClient = submit(jobConf, dag, sessionRef);
session = sessionRef.value;
boolean wasShutdown = false;
synchronized (dagClientLock) {
assert this.dagClient == null;
wasShutdown = this.isShutdown;
if (!wasShutdown) {
this.dagClient = dagClient;
}
}
if (wasShutdown) {
closeDagClientOnCancellation(dagClient);
throw new HiveException("Operation cancelled");
}
// finally monitor will print progress until the job is done
TezJobMonitor monitor = new TezJobMonitor(work.getAllWork(), dagClient, conf, dag, ctx);
rc = monitor.monitorExecution();
if (rc != 0) {
this.setException(new HiveException(monitor.getDiagnostics()));
}
// fetch the counters
try {
Set<StatusGetOpts> statusGetOpts = EnumSet.of(StatusGetOpts.GET_COUNTERS);
counters = dagClient.getDAGStatus(statusGetOpts).getDAGCounters();
} catch (Exception err) {
// Don't fail execution due to counters - just don't print summary info
LOG.warn("Failed to get counters. Ignoring, summary info will be incomplete. " + err, err);
counters = null;
}
} finally {
// Note: due to TEZ-3846, the session may actually be invalid in case of some errors.
// Currently, reopen on an attempted reuse will take care of that; we cannot tell
// if the session is usable until we try.
// We return this to the pool even if it's unusable; reopen is supposed to handle this.
wmContext = ctx.getWmContext();
try {
if (sessionRef.value != null) {
sessionRef.value.returnToSessionManager();
}
} catch (Exception e) {
LOG.error("Failed to return session: {} to pool", session, e);
throw e;
}
if (!conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("none") && wmContext != null) {
if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("json")) {
wmContext.printJson(console);
} else if (conf.getVar(HiveConf.ConfVars.TEZ_SESSION_EVENTS_SUMMARY).equalsIgnoreCase("text")) {
wmContext.print(console);
}
}
}
if (LOG.isInfoEnabled() && counters != null && (HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_EXEC_SUMMARY) || Utilities.isPerfOrAboveLogging(conf))) {
for (CounterGroup group : counters) {
LOG.info(group.getDisplayName() + ":");
for (TezCounter counter : group) {
LOG.info(" " + counter.getDisplayName() + ": " + counter.getValue());
}
}
}
} catch (Exception e) {
LOG.error("Failed to execute tez graph.", e);
// rc will be 1 at this point indicating failure.
} finally {
Utilities.clearWork(conf);
// Clear gWorkMap
for (BaseWork w : work.getAllWork()) {
JobConf workCfg = workToConf.get(w);
if (workCfg != null) {
Utilities.clearWorkMapForConf(workCfg);
}
}
if (cleanContext) {
try {
ctx.clear();
} catch (Exception e) {
/*best effort*/
LOG.warn("Failed to clean up after tez job", e);
}
}
// need to either move tmp files or remove them
DAGClient dagClient = null;
synchronized (dagClientLock) {
dagClient = this.dagClient;
this.dagClient = null;
}
// DagClient as such should have no bearing on jobClose.
if (dagClient != null) {
// rc will only be overwritten if close errors out
rc = close(work, rc, dagClient);
}
}
return rc;
}
use of org.apache.tez.common.counters.CounterGroup in project tez by apache.
the class DAGUtils method convertCountersToATSMap.
public static Map<String, Object> convertCountersToATSMap(TezCounters counters) {
Map<String, Object> object = new LinkedHashMap<String, Object>();
if (counters == null) {
return object;
}
ArrayList<Object> counterGroupsList = new ArrayList<Object>();
for (CounterGroup group : counters) {
ArrayList<Object> counterList = new ArrayList<Object>();
for (TezCounter counter : group) {
if (counter.getValue() != 0) {
Map<String, Object> counterMap = new LinkedHashMap<String, Object>();
counterMap.put(ATSConstants.COUNTER_NAME, counter.getName());
if (!counter.getDisplayName().equals(counter.getName())) {
counterMap.put(ATSConstants.COUNTER_DISPLAY_NAME, counter.getDisplayName());
}
counterMap.put(ATSConstants.COUNTER_VALUE, counter.getValue());
counterList.add(counterMap);
}
}
if (!counterList.isEmpty()) {
Map<String, Object> counterGroupMap = new LinkedHashMap<String, Object>();
counterGroupMap.put(ATSConstants.COUNTER_GROUP_NAME, group.getName());
if (!group.getDisplayName().equals(group.getName())) {
counterGroupMap.put(ATSConstants.COUNTER_GROUP_DISPLAY_NAME, group.getDisplayName());
}
counterGroupMap.put(ATSConstants.COUNTERS, counterList);
counterGroupsList.add(counterGroupMap);
}
}
putInto(object, ATSConstants.COUNTER_GROUPS, counterGroupsList);
return object;
}
Aggregations