Search in sources :

Example 1 with TaskRunner

use of org.apache.hadoop.hive.ql.exec.TaskRunner in project hive by apache.

the class Driver method execute.

public int execute(boolean deferClose) throws CommandNeedRetryException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
    boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
    Metrics metrics = MetricsFactory.getInstance();
    String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
    // Get the query string from the conf file as the compileInternal() method might
    // hide sensitive information during query redaction.
    String queryStr = conf.getQueryString();
    lDrvState.stateLock.lock();
    try {
        // a combined compile/execute in runInternal, throws the error
        if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) {
            SQLState = "HY008";
            errorMessage = "FAILED: query " + queryStr + " has " + (lDrvState.driverState == DriverState.INTERRUPT ? "been cancelled" : "not been compiled.");
            console.printError(errorMessage);
            return 1000;
        } else {
            lDrvState.driverState = DriverState.EXECUTING;
        }
    } finally {
        lDrvState.stateLock.unlock();
    }
    maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
    HookContext hookContext = null;
    // Whether there's any error occurred during query execution. Used for query lifetime hook.
    boolean executionError = false;
    try {
        LOG.info("Executing command(queryId=" + queryId + "): " + queryStr);
        // compile and execute can get called from different threads in case of HS2
        // so clear timing in this thread's Hive object before proceeding.
        Hive.get().clearMetaCallTiming();
        plan.setStarted();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            SessionState.get().getHiveHistory().logPlanProgress(plan);
        }
        resStream = null;
        SessionState ss = SessionState.get();
        hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), ss.getUserFromAuthenticator(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger);
        hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
        for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            } else if (peh instanceof PreExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            }
        }
        // Trigger query hooks before query execution.
        if (queryHooks != null && !queryHooks.isEmpty()) {
            QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
            qhc.setHiveConf(conf);
            qhc.setCommand(queryStr);
            qhc.setHookContext(hookContext);
            for (QueryLifeTimeHook hook : queryHooks) {
                hook.beforeExecution(qhc);
            }
        }
        setQueryDisplays(plan.getRootTasks());
        int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
        int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size();
        if (jobs > 0) {
            logMrWarning(mrJobs);
            console.printInfo("Query ID = " + queryId);
            console.printInfo("Total jobs = " + jobs);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
            SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
        }
        String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
        if (isInterrupted()) {
            return handleInterruption("before running tasks.");
        }
        DriverContext driverCxt = new DriverContext(ctx);
        driverCxt.prepare(plan);
        ctx.setHDFSCleanup(true);
        // for canceling the query (should be bound to session?)
        this.driverCxt = driverCxt;
        SessionState.get().setMapRedStats(new LinkedHashMap<String, MapRedStats>());
        SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
        SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
        // Add root Tasks to runnable
        for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
            // incorrect results.
            assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
            driverCxt.addToRunnable(tsk);
            if (metrics != null) {
                tsk.updateTaskMetrics(metrics);
            }
        }
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
        // Loop while you either have tasks running, or tasks queued up
        while (driverCxt.isRunning()) {
            // Launch upto maxthreads tasks
            Task<? extends Serializable> task;
            while ((task = driverCxt.getRunnable(maxthreads)) != null) {
                TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
                if (!runner.isRunning()) {
                    break;
                }
            }
            // poll the Tasks to see which one completed
            TaskRunner tskRun = driverCxt.pollFinished();
            if (tskRun == null) {
                continue;
            }
            hookContext.addCompleteTask(tskRun);
            queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult());
            Task<? extends Serializable> tsk = tskRun.getTask();
            TaskResult result = tskRun.getTaskResult();
            int exitVal = result.getExitVal();
            if (isInterrupted()) {
                return handleInterruption("when checking the execution result.");
            }
            if (exitVal != 0) {
                if (tsk.ifRetryCmdWhenFail()) {
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    throw new CommandNeedRetryException();
                }
                Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
                if (backupTask != null) {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    console.printError(errorMessage);
                    errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
                    console.printError(errorMessage);
                    // add backup task to runnable
                    if (DriverContext.isLaunchable(backupTask)) {
                        driverCxt.addToRunnable(backupTask);
                    }
                    continue;
                } else {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError());
                    SQLState = "08S01";
                    console.printError(errorMessage);
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    return exitVal;
                }
            }
            driverCxt.finished(tskRun);
            if (SessionState.get() != null) {
                SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
                SessionState.get().getHiveHistory().endTask(queryId, tsk);
            }
            if (tsk.getChildTasks() != null) {
                for (Task<? extends Serializable> child : tsk.getChildTasks()) {
                    if (DriverContext.isLaunchable(child)) {
                        driverCxt.addToRunnable(child);
                    }
                }
            }
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);
        // in case we decided to run everything in local mode, restore the
        // the jobtracker setting to its initial value
        ctx.restoreOriginalTracker();
        if (driverCxt.isShutdown()) {
            SQLState = "HY008";
            errorMessage = "FAILED: Operation cancelled";
            invokeFailureHooks(perfLogger, hookContext, errorMessage, null);
            console.printError(errorMessage);
            return 1000;
        }
        // remove incomplete outputs.
        // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
        // remove them
        HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
        for (WriteEntity output : plan.getOutputs()) {
            if (!output.isComplete()) {
                remOutputs.add(output);
            }
        }
        for (WriteEntity output : remOutputs) {
            plan.getOutputs().remove(output);
        }
        hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
        // Get all the post execution hooks and execute them.
        for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            } else if (peh instanceof PostExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            }
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
            SessionState.get().getHiveHistory().printRowCount(queryId);
        }
        releasePlan(plan);
    } catch (CommandNeedRetryException e) {
        executionError = true;
        throw e;
    } catch (Throwable e) {
        executionError = true;
        if (isInterrupted()) {
            return handleInterruption("during query execution: \n" + e.getMessage());
        }
        ctx.restoreOriginalTracker();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
        }
        // TODO: do better with handling types of Exception here
        errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
        if (hookContext != null) {
            try {
                invokeFailureHooks(perfLogger, hookContext, errorMessage, e);
            } catch (Exception t) {
                LOG.warn("Failed to invoke failure hook", t);
            }
        }
        SQLState = "08S01";
        downstreamError = e;
        console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return (12);
    } finally {
        // Trigger query hooks after query completes its execution.
        try {
            if (queryHooks != null && !queryHooks.isEmpty()) {
                QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
                qhc.setHiveConf(conf);
                qhc.setCommand(queryStr);
                qhc.setHookContext(hookContext);
                for (QueryLifeTimeHook hook : queryHooks) {
                    hook.afterExecution(qhc, executionError);
                }
            }
        } catch (Exception e) {
            LOG.warn("Failed when invoking query after execution hook", e);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().endQuery(queryId);
        }
        if (noName) {
            conf.set(MRJobConfig.JOB_NAME, "");
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00;
        ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings);
        Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
        if (stats != null && !stats.isEmpty()) {
            long totalCpu = 0;
            console.printInfo("MapReduce Jobs Launched: ");
            for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) {
                console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue());
                totalCpu += entry.getValue().getCpuMSec();
            }
            console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
        }
        boolean isInterrupted = isInterrupted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                if (!deferClose) {
                    lDrvState.driverState = DriverState.ERROR;
                }
            } else {
                lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
    if (console != null) {
        console.printInfo("OK");
    }
    return (0);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SessionState(org.apache.hadoop.hive.ql.session.SessionState) QueryLifeTimeHookContextImpl(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContextImpl) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) Metrics(org.apache.hadoop.hive.common.metrics.common.Metrics) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) LinkedList(java.util.LinkedList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HiveSemanticAnalyzerHook(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) Hook(org.apache.hadoop.hive.ql.hooks.Hook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) PreExecute(org.apache.hadoop.hive.ql.hooks.PreExecute) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) PostExecute(org.apache.hadoop.hive.ql.hooks.PostExecute) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) TaskResult(org.apache.hadoop.hive.ql.exec.TaskResult) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 2 with TaskRunner

use of org.apache.hadoop.hive.ql.exec.TaskRunner in project hive by apache.

the class TaskQueue method shutdown.

/**
 * Cleans up remaining tasks in case of failure.
 */
public synchronized void shutdown() {
    LOG.debug("Shutting down query " + ctx.getCmd());
    shutdown = true;
    for (TaskRunner runner : running) {
        if (runner.isRunning()) {
            Task<?> task = runner.getTask();
            LOG.warn("Shutting down task : " + task);
            try {
                task.shutdown();
            } catch (Exception e) {
                CONSOLE.printError("Exception on shutting down task " + task.getId() + ": " + e);
            }
            Thread thread = runner.getRunner();
            if (thread != null) {
                thread.interrupt();
            }
        }
    }
    running.clear();
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Example 3 with TaskRunner

use of org.apache.hadoop.hive.ql.exec.TaskRunner in project hive by apache.

the class Executor method launchTask.

private TaskRunner launchTask(Task<?> task, boolean noName, String jobName, int jobCount) throws HiveException {
    SessionState.get().getHiveHistory().startTask(driverContext.getQueryId(), task, task.getClass().getName());
    if (task.isMapRedTask() && !(task instanceof ConditionalTask)) {
        if (noName) {
            driverContext.getConf().set(MRJobConfig.JOB_NAME, jobName + " (" + task.getId() + ")");
        }
        taskQueue.incCurJobNo(1);
        CONSOLE.printInfo("Launching Job " + taskQueue.getCurJobNo() + " out of " + jobCount);
    }
    task.initialize(driverContext.getQueryState(), driverContext.getPlan(), taskQueue, context);
    TaskRunner taskRun = new TaskRunner(task, taskQueue);
    taskQueue.launching(taskRun);
    if (HiveConf.getBoolVar(task.getConf(), HiveConf.ConfVars.EXECPARALLEL) && task.canExecuteInParallel()) {
        LOG.info("Starting task [" + task + "] in parallel");
        taskRun.start();
    } else {
        LOG.info("Starting task [" + task + "] in serial mode");
        taskRun.runSequential();
    }
    return taskRun;
}
Also used : ConditionalTask(org.apache.hadoop.hive.ql.exec.ConditionalTask) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Example 4 with TaskRunner

use of org.apache.hadoop.hive.ql.exec.TaskRunner in project hive by apache.

the class Executor method launchTasks.

private void launchTasks(boolean noName, int jobCount, String jobName) throws HiveException {
    // Launch upto maxthreads tasks
    Task<?> task;
    int maxthreads = HiveConf.getIntVar(driverContext.getConf(), HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
    while ((task = taskQueue.getRunnable(maxthreads)) != null) {
        TaskRunner runner = launchTask(task, noName, jobName, jobCount);
        if (!runner.isRunning()) {
            break;
        }
    }
}
Also used : TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Example 5 with TaskRunner

use of org.apache.hadoop.hive.ql.exec.TaskRunner in project hive by apache.

the class Executor method handleFinished.

private void handleFinished() throws Exception {
    // poll the Tasks to see which one completed
    TaskRunner taskRun = taskQueue.pollFinished();
    if (taskRun == null) {
        return;
    }
    /*
      This should be removed eventually. HIVE-17814 gives more detail
      explanation of whats happening and HIVE-17815 as to why this is done.
      Briefly for replication the graph is huge and so memory pressure is going to be huge if
      we keep a lot of references around.
    */
    String opName = driverContext.getPlan().getOperationName();
    boolean isReplicationOperation = opName.equals(HiveOperation.REPLDUMP.getOperationName()) || opName.equals(HiveOperation.REPLLOAD.getOperationName());
    if (!isReplicationOperation) {
        hookContext.addCompleteTask(taskRun);
    }
    driverContext.getQueryDisplay().setTaskResult(taskRun.getTask().getId(), taskRun.getTaskResult());
    Task<?> task = taskRun.getTask();
    TaskResult result = taskRun.getTaskResult();
    int exitVal = result.getExitVal();
    SessionState.get().getHiveHistory().setTaskProperty(driverContext.getQueryId(), task.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
    SessionState.get().getHiveHistory().endTask(driverContext.getQueryId(), task);
    DriverUtils.checkInterrupted(driverState, driverContext, "when checking the execution result.", hookContext, SessionState.getPerfLogger());
    if (exitVal != 0) {
        handleTaskFailure(task, result, exitVal);
        return;
    }
    taskQueue.finished(taskRun);
    if (task.getChildTasks() != null) {
        for (Task<?> child : task.getChildTasks()) {
            if (TaskQueue.isLaunchable(child)) {
                taskQueue.addToRunnable(child);
            }
        }
    }
}
Also used : TaskResult(org.apache.hadoop.hive.ql.exec.TaskResult) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner)

Aggregations

TaskRunner (org.apache.hadoop.hive.ql.exec.TaskRunner)13 TaskResult (org.apache.hadoop.hive.ql.exec.TaskResult)4 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)4 HiveConf (org.apache.hadoop.hive.conf.HiveConf)3 SessionState (org.apache.hadoop.hive.ql.session.SessionState)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Map (java.util.Map)2 Metrics (org.apache.hadoop.hive.common.metrics.common.Metrics)2 QueryPlan (org.apache.hadoop.hive.ql.QueryPlan)2 ConditionalTask (org.apache.hadoop.hive.ql.exec.ConditionalTask)2 HookContext (org.apache.hadoop.hive.ql.hooks.HookContext)2 WriteEntity (org.apache.hadoop.hive.ql.hooks.WriteEntity)2 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)2 PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)2 AuthorizationException (org.apache.hadoop.hive.ql.metadata.AuthorizationException)2 HiveSemanticAnalyzerHookContext (org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext)2