Search in sources :

Example 6 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project hive by apache.

the class TezJobMonitor method monitorExecution.

public int monitorExecution() {
    boolean done = false;
    boolean success = false;
    int failedCounter = 0;
    final StopWatch failureTimer = new StopWatch();
    int rc = 0;
    DAGStatus status = null;
    Map<String, Progress> vertexProgressMap = null;
    long monitorStartTime = System.currentTimeMillis();
    synchronized (shutdownList) {
        shutdownList.add(dagClient);
    }
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
    DAGStatus.State lastState = null;
    boolean running = false;
    long checkInterval = HiveConf.getTimeVar(hiveConf, HiveConf.ConfVars.TEZ_DAG_STATUS_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
    WmContext wmContext = null;
    while (true) {
        try {
            if (context != null) {
                context.checkHeartbeaterLockException();
            }
            status = dagClient.getDAGStatus(EnumSet.of(StatusGetOpts.GET_COUNTERS), checkInterval);
            TezCounters dagCounters = status.getDAGCounters();
            vertexProgressMap = status.getVertexProgress();
            wmContext = context.getWmContext();
            List<String> vertexNames = vertexProgressMap.keySet().stream().map(k -> k.replaceAll(" ", "_")).collect(Collectors.toList());
            if (dagCounters != null && wmContext != null) {
                Set<String> desiredCounters = wmContext.getSubscribedCounters();
                if (desiredCounters != null && !desiredCounters.isEmpty()) {
                    Map<String, Long> currentCounters = getCounterValues(dagCounters, vertexNames, vertexProgressMap, desiredCounters, done);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Requested DAG status. checkInterval: {}. currentCounters: {}", checkInterval, currentCounters);
                    }
                    wmContext.setCurrentCounters(currentCounters);
                }
            }
            DAGStatus.State state = status.getState();
            // AM is responsive again (recovery?)
            failedCounter = 0;
            failureTimer.reset();
            if (state != lastState || state == RUNNING) {
                lastState = state;
                switch(state) {
                    case SUBMITTED:
                        console.printInfo("Status: Submitted");
                        break;
                    case INITING:
                        console.printInfo("Status: Initializing");
                        this.executionStartTime = System.currentTimeMillis();
                        break;
                    case RUNNING:
                        if (!running) {
                            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_SUBMIT_TO_RUNNING);
                            console.printInfo("Status: Running (" + dagClient.getExecutionContext() + ")\n");
                            this.executionStartTime = System.currentTimeMillis();
                            running = true;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        break;
                    case SUCCEEDED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        success = true;
                        running = false;
                        done = true;
                        break;
                    case KILLED:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printInfo("Status: Killed");
                        running = false;
                        done = true;
                        rc = 1;
                        break;
                    case FAILED:
                    case ERROR:
                        if (!running) {
                            this.executionStartTime = monitorStartTime;
                        }
                        updateFunction.update(status, vertexProgressMap);
                        console.printError("Status: Failed");
                        running = false;
                        done = true;
                        rc = 2;
                        break;
                }
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } catch (Exception e) {
            console.printInfo("Exception: " + e.getMessage());
            boolean isInterrupted = hasInterruptedException(e);
            if (failedCounter == 0) {
                failureTimer.reset();
                failureTimer.start();
            }
            if (isInterrupted || (++failedCounter >= MAX_RETRY_FAILURES && failureTimer.now(TimeUnit.MILLISECONDS) > MAX_RETRY_INTERVAL)) {
                try {
                    if (isInterrupted) {
                        console.printInfo("Killing DAG...");
                    } else {
                        console.printInfo(String.format("Killing DAG... after %d seconds", failureTimer.now(TimeUnit.SECONDS)));
                    }
                    dagClient.tryKillDAG();
                } catch (IOException | TezException tezException) {
                // best effort
                }
                console.printError("Execution has failed. stack trace: " + ExceptionUtils.getStackTrace(e));
                rc = 1;
                done = true;
            } else {
                console.printInfo("Retrying...");
            }
            if (wmContext != null && done) {
                wmContext.setQueryCompleted(true);
            }
        } finally {
            if (done) {
                if (wmContext != null && done) {
                    wmContext.setQueryCompleted(true);
                }
                if (rc != 0 && status != null) {
                    for (String diag : status.getDiagnostics()) {
                        console.printError(diag);
                        diagnostics.append(diag);
                    }
                }
                synchronized (shutdownList) {
                    shutdownList.remove(dagClient);
                }
                break;
            }
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_RUN_DAG);
    printSummary(success, vertexProgressMap);
    return rc;
}
Also used : DAGClient(org.apache.tez.dag.api.client.DAGClient) StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounter(org.apache.tez.common.counters.TezCounter) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) InterruptedIOException(java.io.InterruptedIOException) TimeCounterLimit(org.apache.hadoop.hive.ql.wm.TimeCounterLimit) VertexCounterLimit(org.apache.hadoop.hive.ql.wm.VertexCounterLimit) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) DAGStatus(org.apache.tez.dag.api.client.DAGStatus) ProgressMonitor(org.apache.hadoop.hive.common.log.ProgressMonitor) Map(java.util.Map) InPlaceUpdate(org.apache.hadoop.hive.common.log.InPlaceUpdate) Context(org.apache.hadoop.hive.ql.Context) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) CounterGroup(org.apache.tez.common.counters.CounterGroup) LinkedList(java.util.LinkedList) EnumSet(java.util.EnumSet) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Progress(org.apache.tez.dag.api.client.Progress) Logger(org.slf4j.Logger) StringWriter(java.io.StringWriter) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Set(java.util.Set) StopWatch(org.apache.tez.util.StopWatch) IOException(java.io.IOException) DAG(org.apache.tez.dag.api.DAG) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) RUNNING(org.apache.tez.dag.api.client.DAGStatus.State.RUNNING) ShutdownHookManager(org.apache.hive.common.util.ShutdownHookManager) LogHelper(org.apache.hadoop.hive.ql.session.SessionState.LogHelper) TezException(org.apache.tez.dag.api.TezException) TimeUnit(java.util.concurrent.TimeUnit) TezCounters(org.apache.tez.common.counters.TezCounters) List(java.util.List) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) Preconditions(com.google.common.base.Preconditions) TezSessionPoolManager(org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager) ExceptionUtils(org.apache.commons.lang3.exception.ExceptionUtils) Progress(org.apache.tez.dag.api.client.Progress) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) TezCounters(org.apache.tez.common.counters.TezCounters) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TezException(org.apache.tez.dag.api.TezException) StopWatch(org.apache.tez.util.StopWatch) DAGStatus(org.apache.tez.dag.api.client.DAGStatus)

Example 7 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project hive by apache.

the class LlapTaskReporter method registerTask.

/**
 * Register a task to be tracked. Heartbeats will be sent out for this task to fetch events, etc.
 */
@Override
public synchronized void registerTask(RuntimeTask task, ErrorReporter errorReporter) {
    TezCounters tezCounters = task.addAndGetTezCounter(fragmentId);
    FragmentCountersMap.registerCountersForFragment(fragmentId, tezCounters);
    LOG.info("Registered counters for fragment: {} vertexName: {}", fragmentId, task.getVertexName());
    currentCallable = new HeartbeatCallable(completionListener, task, umbilical, pollInterval, sendCounterInterval, maxEventsToGet, requestCounter, containerIdStr, initialEvent, fragmentRequestId);
    ListenableFuture<Boolean> future = heartbeatExecutor.submit(currentCallable);
    Futures.addCallback(future, new HeartbeatCallback(errorReporter));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 8 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project hive by apache.

the class PostExecOrcRowGroupCountPrinter method run.

@Override
public void run(HookContext hookContext) throws Exception {
    assert (hookContext.getHookType() == HookContext.HookType.POST_EXEC_HOOK);
    HiveConf conf = hookContext.getConf();
    if (!"tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
        return;
    }
    LOG.info("Executing post execution hook to print ORC row groups read counter..");
    SessionState ss = SessionState.get();
    SessionState.LogHelper console = ss.getConsole();
    QueryPlan plan = hookContext.getQueryPlan();
    if (plan == null) {
        return;
    }
    List<TezTask> rootTasks = Utilities.getTezTasks(plan.getRootTasks());
    for (TezTask tezTask : rootTasks) {
        LOG.info("Printing ORC row group counter for tez task: " + tezTask.getName());
        TezCounters counters = tezTask.getTezCounters();
        if (counters != null) {
            for (CounterGroup group : counters) {
                if (group.getName().equals(LlapIOCounters.class.getName())) {
                    console.printInfo(tezTask.getId() + " LLAP IO COUNTERS:", false);
                    for (TezCounter counter : group) {
                        if (counter.getDisplayName().equals(LlapIOCounters.SELECTED_ROWGROUPS.name())) {
                            console.printInfo("   " + counter.getDisplayName() + ": " + counter.getValue(), false);
                        }
                    }
                }
            }
        }
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) LlapIOCounters(org.apache.hadoop.hive.llap.counters.LlapIOCounters) CounterGroup(org.apache.tez.common.counters.CounterGroup) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TezCounter(org.apache.tez.common.counters.TezCounter) QueryPlan(org.apache.hadoop.hive.ql.QueryPlan) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) TezCounters(org.apache.tez.common.counters.TezCounters)

Example 9 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project hive by apache.

the class HiveSplitGenerator method initialize.

@SuppressWarnings("unchecked")
@Override
public List<Event> initialize() throws Exception {
    // Setup the map work for this thread. Pruning modified the work instance to potentially remove
    // partitions. The same work instance must be used when generating splits.
    Utilities.setMapWork(jobConf, work);
    try {
        boolean sendSerializedEvents = conf.getBoolean("mapreduce.tez.input.initializer.serialize.event.payload", true);
        // perform dynamic partition pruning
        if (pruner != null) {
            pruner.prune();
        }
        InputSplitInfoMem inputSplitInfo = null;
        boolean generateConsistentSplits = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_GENERATE_CONSISTENT_SPLITS);
        LOG.info("GenerateConsistentSplitsInHive=" + generateConsistentSplits);
        String realInputFormatName = conf.get("mapred.input.format.class");
        boolean groupingEnabled = userPayloadProto.getGroupingEnabled();
        if (groupingEnabled) {
            // Need to instantiate the realInputFormat
            InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(realInputFormatName), jobConf);
            int totalResource = 0;
            int taskResource = 0;
            int availableSlots = 0;
            // FIXME. Do the right thing Luke.
            if (getContext() == null) {
                // for now, totalResource = taskResource for llap
                availableSlots = 1;
            }
            if (getContext() != null) {
                totalResource = getContext().getTotalAvailableResource().getMemory();
                taskResource = getContext().getVertexTaskResource().getMemory();
                availableSlots = totalResource / taskResource;
            }
            if (HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, 1) <= 1) {
                // broken configuration from mapred-default.xml
                final long blockSize = conf.getLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT);
                final long minGrouping = conf.getLong(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT);
                final long preferredSplitSize = Math.min(blockSize / 2, minGrouping);
                HiveConf.setLongVar(jobConf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, preferredSplitSize);
                LOG.info("The preferred split size is " + preferredSplitSize);
            }
            // Create the un-grouped splits
            float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES, TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT);
            // Raw splits
            InputSplit[] splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves));
            // Sort the splits, so that subsequent grouping is consistent.
            Arrays.sort(splits, new InputSplitComparator());
            LOG.info("Number of input splits: " + splits.length + ". " + availableSlots + " available slots, " + waves + " waves. Input format is: " + realInputFormatName);
            // increment/set input counters
            InputInitializerContext inputInitializerContext = getContext();
            TezCounters tezCounters = null;
            String counterName;
            String groupName = null;
            String vertexName = null;
            if (inputInitializerContext != null) {
                tezCounters = new TezCounters();
                groupName = HiveInputCounters.class.getName();
                vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
                counterName = Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(), vertexName);
                tezCounters.findCounter(groupName, counterName).increment(splits.length);
                final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
                counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(), vertexName);
                tezCounters.findCounter(groupName, counterName).increment(paths.size());
                final Set<String> files = new HashSet<>();
                for (InputSplit inputSplit : splits) {
                    if (inputSplit instanceof FileSplit) {
                        final FileSplit fileSplit = (FileSplit) inputSplit;
                        final Path path = fileSplit.getPath();
                        // The assumption here is the path is a file. Only case this is different is ACID deltas.
                        // The isFile check is avoided here for performance reasons.
                        final String fileStr = path.toString();
                        if (!files.contains(fileStr)) {
                            files.add(fileStr);
                        }
                    }
                }
                counterName = Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(), vertexName);
                tezCounters.findCounter(groupName, counterName).increment(files.size());
            }
            if (work.getIncludedBuckets() != null) {
                splits = pruneBuckets(work, splits);
            }
            Multimap<Integer, InputSplit> groupedSplits = splitGrouper.generateGroupedSplits(jobConf, conf, splits, waves, availableSlots, splitLocationProvider);
            // And finally return them in a flat array
            InputSplit[] flatSplits = groupedSplits.values().toArray(new InputSplit[0]);
            LOG.info("Number of split groups: " + flatSplits.length);
            if (inputInitializerContext != null) {
                counterName = Utilities.getVertexCounterName(HiveInputCounters.GROUPED_INPUT_SPLITS.name(), vertexName);
                tezCounters.findCounter(groupName, counterName).setValue(flatSplits.length);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Published tez counters: " + tezCounters);
                }
                inputInitializerContext.addCounters(tezCounters);
            }
            List<TaskLocationHint> locationHints = splitGrouper.createTaskLocationHints(flatSplits, generateConsistentSplits);
            inputSplitInfo = new InputSplitInfoMem(flatSplits, locationHints, flatSplits.length, null, jobConf);
        } else {
            // If this is used in the future - make sure to disable grouping in the payload, if it isn't already disabled
            throw new RuntimeException("HiveInputFormat does not support non-grouped splits, InputFormatName is: " + realInputFormatName);
        // inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(jobConf, false, 0);
        }
        return createEventList(sendSerializedEvents, inputSplitInfo);
    } finally {
        Utilities.clearWork(jobConf);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSplit(org.apache.hadoop.mapred.FileSplit) InputInitializerContext(org.apache.tez.runtime.api.InputInitializerContext) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) VertexLocationHint(org.apache.tez.dag.api.VertexLocationHint) TezCounters(org.apache.tez.common.counters.TezCounters) TaskLocationHint(org.apache.tez.dag.api.TaskLocationHint) InputSplitInfoMem(org.apache.tez.mapreduce.hadoop.InputSplitInfoMem) InputFormat(org.apache.hadoop.mapred.InputFormat) InputSplit(org.apache.hadoop.mapred.InputSplit) HashSet(java.util.HashSet)

Example 10 with TezCounters

use of org.apache.tez.common.counters.TezCounters in project hive by apache.

the class LlapWmSummary method print.

@Override
public void print(SessionState.LogHelper console) {
    console.printInfo("");
    console.printInfo(LLAP_SUMMARY_TITLE);
    SortedSet<String> keys = new TreeSet<>(progressMap.keySet());
    Set<StatusGetOpts> statusOptions = new HashSet<>(1);
    statusOptions.add(StatusGetOpts.GET_COUNTERS);
    String counterGroup = LlapWmCounters.class.getName();
    for (String vertexName : keys) {
        TezCounters vertexCounters = vertexCounter(statusOptions, vertexName);
        if (vertexCounters != null) {
            if (!first) {
                console.printInfo(SEPARATOR);
                console.printInfo(LLAP_SUMMARY_HEADER);
                console.printInfo(SEPARATOR);
                first = true;
            }
            console.printInfo(vertexSummary(vertexName, counterGroup, vertexCounters));
        }
    }
    console.printInfo(SEPARATOR);
    console.printInfo("");
}
Also used : StatusGetOpts(org.apache.tez.dag.api.client.StatusGetOpts) TezCounters(org.apache.tez.common.counters.TezCounters)

Aggregations

TezCounters (org.apache.tez.common.counters.TezCounters)11 TezCounter (org.apache.tez.common.counters.TezCounter)5 StatusGetOpts (org.apache.tez.dag.api.client.StatusGetOpts)5 HiveConf (org.apache.hadoop.hive.conf.HiveConf)4 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)3 TezTask (org.apache.hadoop.hive.ql.exec.tez.TezTask)3 SessionState (org.apache.hadoop.hive.ql.session.SessionState)3 CounterGroup (org.apache.tez.common.counters.CounterGroup)3 List (java.util.List)2 BaseWork (org.apache.hadoop.hive.ql.plan.BaseWork)2 Preconditions (com.google.common.base.Preconditions)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 StringWriter (java.io.StringWriter)1 EnumSet (java.util.EnumSet)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 Set (java.util.Set)1