use of org.apache.tez.common.counters.TezCounters in project hive by apache.
the class TezJobMonitor method monitorExecution.
public int monitorExecution() {
boolean done = false;
boolean success = false;
int failedCounter = 0;
final StopWatch failureTimer = new StopWatch();
int rc = 0;
DAGStatus status = null;
Map<String, Progress> vertexProgressMap = null;
long monitorStartTime = System.currentTimeMillis();
synchronized (shutdownList) {
shutdownList.add(dagClient);
}
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
DAGStatus.State lastState = null;
boolean running = false;
long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
WmContext wmContext = null;
while (true) {
try {
if (context != null) {
context.checkHeartbeaterLockException();
}
status = dagClient.getDAGStatus(EnumSet.of(StatusGetOpts.GET_COUNTERS), checkInterval);
TezCounters dagCounters = status.getDAGCounters();
vertexProgressMap = status.getVertexProgress();
wmContext = context.getWmContext();
List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
if (dagCounters != null && wmContext != null) {
Set<String> desiredCounters = wmContext.getSubscribedCounters();
if (desiredCounters != null && !desiredCounters.isEmpty()) {
Map<String, Long> currentCounters = getCounterValues(dagCounters, vertexNames, vertexProgressMap, desiredCounters, done);
if (LOG.isDebugEnabled()) {
LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
}
wmContext.setCurrentCounters(currentCounters);
}
}
DAGStatus.State state = status.getState();
// AM is responsive again (recovery?)
failedCounter = 0;
failureTimer.reset();
if (state != lastState || state == RUNNING) {
lastState = state;
switch(state) {
case SUBMITTED:
console.printInfo("Status: Submitted");
break;
case INITING:
console.printInfo("Status: Initializing");
this.executionStartTime = System.currentTimeMillis();
break;
case RUNNING:
if (!running) {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
this.executionStartTime = System.currentTimeMillis();
running = true;
}
updateFunction.update(status, vertexProgressMap);
break;
case SUCCEEDED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
success = true;
running = false;
done = true;
break;
case KILLED:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printInfo("Status: Killed");
running = false;
done = true;
rc = 1;
break;
case FAILED:
case ERROR:
if (!running) {
this.executionStartTime = monitorStartTime;
}
updateFunction.update(status, vertexProgressMap);
console.printError("Status: Failed");
running = false;
done = true;
rc = 2;
break;
}
}
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
} catch (Exception e) {
console.printInfo("Exception: " + e.getMessage());
boolean isInterrupted = hasInterruptedException(e);
if (failedCounter == 0) {
failureTimer.reset();
failureTimer.start();
}
if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
try {
if (isInterrupted) {
console.printInfo("Killing DAG...");
} else {
console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
}
dagClient.tryKillDAG();
} catch (IOException | TezException tezException) {
// best effort
}
console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
rc = 1;
done = true;
} else {
console.printInfo("Retrying...");
}
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
} finally {
if (done) {
if (wmContext != null && done) {
wmContext.setQueryCompleted(true);
}
if (rc != 0 && status != null) {
for (String diag : status.getDiagnostics()) {
console.printError(diag);
diagnostics.append(diag);
}
}
synchronized (shutdownList) {
shutdownList.remove(dagClient);
}
break;
}
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
printSummary(success, vertexProgressMap);
return rc;
}
use of org.apache.tez.common.counters.TezCounters in project hive by apache.
the class LlapTaskReporter method registerTask.
/**
* Register a task to be tracked. Heartbeats will be sent out for this task to fetch events, etc.
*/
@Override
public synchronized void registerTask(RuntimeTask task, ErrorReporter errorReporter) {
TezCounters tezCounters = task.addAndGetTezCounter(fragmentId);
FragmentCountersMap.registerCountersForFragment(fragmentId, tezCounters);
LOG.info("Registered counters for fragment: {} vertexName: {}", fragmentId, task.getVertexName());
currentCallable = new HeartbeatCallable(completionListener, task, umbilical, pollInterval, sendCounterInterval, maxEventsToGet, requestCounter, containerIdStr, initialEvent, fragmentRequestId);
ListenableFuture<Boolean> future = heartbeatExecutor.submit(currentCallable);
Futures.addCallback(future, new HeartbeatCallback(errorReporter));
}
use of org.apache.tez.common.counters.TezCounters in project hive by apache.
the class PostExecOrcRowGroupCountPrinter method run.
@Override
public void run(HookContext hookContext) throws Exception {
assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
HiveConf conf = hookContext.getConf();
if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
return;
}
LOG.info("Executing post execution hook to print ORC row groups read counter..");
SessionState ss = SessionState.get();
SessionState.LogHelper console = ss.getConsole();
QueryPlan plan = hookContext.getQueryPlan();
if (plan == null) {
return;
}
List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
for (TezTask tezTask : rootTasks) {
LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName());
TezCounters counters = tezTask.getTezCounters();
if (counters != null) {
for (CounterGroup group : counters) {
if (group.getName().equals(LlapIOCounters.class.getName())) {
console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
for (TezCounter counter : group) {
if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) {
console.printInfo(" " + counter.getDisplayName() + ": " + counter.getValue(), false);
}
}
}
}
}
}
}
use of org.apache.tez.common.counters.TezCounters in project hive by apache.
the class HiveSplitGenerator method initialize.
@SuppressWarnings("unchecked")
@Override
public List<Event> initialize() throws Exception {
// Setup the map work for this thread. Pruning modified the work instance to potentially remove
// partitions. The same work instance must be used when generating splits.
Utilities.setMapWork(jobConf, work);
try {
boolean sendSerializedEvents = conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);
// perform dynamic partition pruning
if (pruner != null) {
pruner.prune();
}
InputSplitInfoMem inputSplitInfo = null;
boolean generateConsistentSplits = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS);
LOG.info("GenerateConsistentSplitsInHive=" + generateConsistentSplits);
String realInputFormatName = conf.get("mapred.input.format.class");
boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
if (groupingEnabled) {
// Need to instantiate the realInputFormat
InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), jobConf);
int totalResource = 0;
int taskResource = 0;
int availableSlots = 0;
// FIXME. Do the right thing Luke.
if (getContext() == null) {
// for now, totalResource = taskResource for llap
availableSlots = 1;
}
if (getContext() != null) {
totalResource = getContext().getTotalAvailableResource().getMemory();
taskResource = getContext().getVertexTaskResource().getMemory();
availableSlots = totalResource / taskResource;
}
if (HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1) <= 1) {
// broken configuration from mapred-default.xml
final long blockSize = conf.getLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT);
final long minGrouping = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
final long preferredSplitSize = Math.min(blockSize / 2, minGrouping);
HiveConf.setLongVar(jobConf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, preferredSplitSize);
LOG.info("The preferred split size is " + preferredSplitSize);
}
// Create the un-grouped splits
float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
// Raw splits
InputSplit[] splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves));
// Sort the splits, so that subsequent grouping is consistent.
Arrays.sort(splits, new InputSplitComparator());
LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + " available slots, " + waves + " waves. Input format is: " + realInputFormatName);
// increment/set input counters
InputInitializerContext inputInitializerContext = getContext();
TezCounters tezCounters = null;
String counterName;
String groupName = null;
String vertexName = null;
if (inputInitializerContext != null) {
tezCounters = new TezCounters();
groupName = HiveInputCounters.class.getName();
vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(splits.length);
final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(paths.size());
final Set<String> files = new HashSet<>();
for (InputSplit inputSplit : splits) {
if (inputSplit instanceof FileSplit) {
final FileSplit fileSplit = (FileSplit) inputSplit;
final Path path = fileSplit.getPath();
// The assumption here is the path is a file. Only case this is different is ACID deltas.
// The isFile check is avoided here for performance reasons.
final String fileStr = path.toString();
if (!files.contains(fileStr)) {
files.add(fileStr);
}
}
}
counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), vertexName);
tezCounters.findCounter(groupName, counterName).increment(files.size());
}
if (work.getIncludedBuckets() != null) {
splits = pruneBuckets(work, splits);
}
Multimap<Integer, InputSplit> groupedSplits = splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, splitLocationProvider);
// And finally return them in a flat array
InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]);
LOG.info("Number of split groups: " + flatSplits.length);
if (inputInitializerContext != null) {
counterName = Utilities.getVertexCounterName(HiveInputCounters.GROUPED_INPUT_SPLITS.name(), vertexName);
tezCounters.findCounter(groupName, counterName).setValue(flatSplits.length);
if (LOG.isDebugEnabled()) {
LOG.debug("Published tez counters: " + tezCounters);
}
inputInitializerContext.addCounters(tezCounters);
}
List<TaskLocationHint> locationHints = splitGrouper.createTaskLocationHints(flatSplits, generateConsistentSplits);
inputSplitInfo = new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf);
} else {
// If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled
throw new RuntimeException("HiveInputFormat does not support non-grouped splits, InputFormatName is: " + realInputFormatName);
// inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
}
return createEventList(sendSerializedEvents, inputSplitInfo);
} finally {
Utilities.clearWork(jobConf);
}
}
use of org.apache.tez.common.counters.TezCounters in project hive by apache.
the class LlapWmSummary method print.
@Override
public void print(SessionState.LogHelper console) {
console.printInfo("");
console.printInfo(LLAP_SUMMARY_TITLE);
SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
Set<StatusGetOpts> statusOptions = new HashSet<>(1);
statusOptions.add(StatusGetOpts.GET_COUNTERS);
String counterGroup = LlapWmCounters.class.getName();
for (String vertexName : keys) {
TezCounters vertexCounters = vertexCounter(statusOptions, vertexName);
if (vertexCounters != null) {
if (!first) {
console.printInfo(SEPARATOR);
console.printInfo(LLAP_SUMMARY_HEADER);
console.printInfo(SEPARATOR);
first = true;
}
console.printInfo(vertexSummary(vertexName, counterGroup, vertexCounters));
}
}
console.printInfo(SEPARATOR);
console.printInfo("");
}
Aggregations