Search in sources :

Example 1 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Hive method loadPartition.

/**
   * Load a directory into a Hive Table Partition - Alters existing content of
   * the partition with the contents of loadPath. - If the partition does not
   * exist - one is created - files in loadPath are moved into Hive. But the
   * directory itself is not removed.
   *
   * @param loadPath
   *          Directory containing files to load into Table
   * @param  tbl
   *          name of table to be loaded.
   * @param partSpec
   *          defines which partition needs to be loaded
   * @param replace
   *          if true - replace files in the partition, otherwise add files to
   *          the partition
   * @param inheritTableSpecs if true, on [re]creating the partition, take the
   *          location/inputformat/outputformat/serde details from table spec
   * @param isSrcLocal
   *          If the source directory is LOCAL
   * @param isAcid true if this is an ACID operation
   */
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
    Path tblDataLocationPath = tbl.getDataLocation();
    try {
        Partition oldPart = getPartition(tbl, partSpec, false);
        /**
       * Move files before creating the partition since down stream processes
       * check for existence of partition in metadata before accessing the data.
       * If partition is created before data is moved, downstream waiting
       * processes might move forward with partial data
       */
        Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
        Path newPartPath = null;
        if (inheritTableSpecs) {
            Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
            newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
            if (oldPart != null) {
                /*
           * If we are moving the partition across filesystem boundaries
           * inherit from the table properties. Otherwise (same filesystem) use the
           * original partition location.
           *
           * See: HIVE-1707 and HIVE-2117 for background
           */
                FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
                FileSystem loadPathFS = loadPath.getFileSystem(getConf());
                if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
                    newPartPath = oldPartPath;
                }
            }
        } else {
            newPartPath = oldPartPath;
        }
        List<Path> newFiles = null;
        PerfLogger perfLogger = SessionState.getPerfLogger();
        perfLogger.PerfLogBegin("MoveTask", "FileMoves");
        if (replace || (oldPart == null && !isAcid)) {
            replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal);
        } else {
            if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) {
                newFiles = Collections.synchronizedList(new ArrayList<Path>());
            }
            FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
            Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles);
        }
        perfLogger.PerfLogEnd("MoveTask", "FileMoves");
        Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
        alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
        validatePartition(newTPart);
        if ((null != newFiles) || replace) {
            fireInsertEvent(tbl, partSpec, newFiles);
        } else {
            LOG.debug("No new files were created, and is not a replace. Skipping generating INSERT event.");
        }
        //column stats will be inaccurate
        StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
        // recreate the partition if it existed before
        if (isSkewedStoreAsSubdir) {
            org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
            SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
            /* Construct list bucketing location mappings from sub-directory name. */
            Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
            /* Add list bucketing location mappings. */
            skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
            newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
        }
        if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
        }
        if (oldPart == null) {
            newTPart.getTPartition().setParameters(new HashMap<String, String>());
            if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(), StatsSetupConst.TRUE);
            }
            MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters());
            try {
                LOG.debug("Adding new partition " + newTPart.getSpec());
                getSychronizedMSC().add_partition(newTPart.getTPartition());
            } catch (AlreadyExistsException aee) {
                // With multiple users concurrently issuing insert statements on the same partition has
                // a side effect that some queries may not see a partition at the time when they're issued,
                // but will realize the partition is actually there when it is trying to add such partition
                // to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
                // For example, imagine such a table is created:
                //  create table T (name char(50)) partitioned by (ds string);
                // and the following two queries are launched at the same time, from different sessions:
                //  insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
                //  insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
                // In that case, we want to retry with alterPartition.
                LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
                setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
            }
        } else {
            setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
        }
        return newTPart;
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (MetaException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (InvalidOperationException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (TException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TException(org.apache.thrift.TException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException)

Example 2 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method execute.

public int execute(boolean deferClose) throws CommandNeedRetryException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_EXECUTE);
    boolean noName = StringUtils.isEmpty(conf.get(MRJobConfig.JOB_NAME));
    int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
    Metrics metrics = MetricsFactory.getInstance();
    String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
    // Get the query string from the conf file as the compileInternal() method might
    // hide sensitive information during query redaction.
    String queryStr = conf.getQueryString();
    lDrvState.stateLock.lock();
    try {
        // a combined compile/execute in runInternal, throws the error
        if (lDrvState.driverState != DriverState.COMPILED && lDrvState.driverState != DriverState.EXECUTING) {
            SQLState = "HY008";
            errorMessage = "FAILED: query " + queryStr + " has " + (lDrvState.driverState == DriverState.INTERRUPT ? "been cancelled" : "not been compiled.");
            console.printError(errorMessage);
            return 1000;
        } else {
            lDrvState.driverState = DriverState.EXECUTING;
        }
    } finally {
        lDrvState.stateLock.unlock();
    }
    maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
    HookContext hookContext = null;
    // Whether there's any error occurred during query execution. Used for query lifetime hook.
    boolean executionError = false;
    try {
        LOG.info("Executing command(queryId=" + queryId + "): " + queryStr);
        // compile and execute can get called from different threads in case of HS2
        // so clear timing in this thread's Hive object before proceeding.
        Hive.get().clearMetaCallTiming();
        plan.setStarted();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
            SessionState.get().getHiveHistory().logPlanProgress(plan);
        }
        resStream = null;
        SessionState ss = SessionState.get();
        hookContext = new HookContext(plan, queryState, ctx.getPathToCS(), ss.getUserFromAuthenticator(), ss.getUserIpAddress(), InetAddress.getLocalHost().getHostAddress(), operationId, ss.getSessionId(), Thread.currentThread().getName(), ss.isHiveServerQuery(), perfLogger);
        hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
        for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            } else if (peh instanceof PreExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
                ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PRE_HOOK + peh.getClass().getName());
            }
        }
        // Trigger query hooks before query execution.
        if (queryHooks != null && !queryHooks.isEmpty()) {
            QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
            qhc.setHiveConf(conf);
            qhc.setCommand(queryStr);
            qhc.setHookContext(hookContext);
            for (QueryLifeTimeHook hook : queryHooks) {
                hook.beforeExecution(qhc);
            }
        }
        setQueryDisplays(plan.getRootTasks());
        int mrJobs = Utilities.getMRTasks(plan.getRootTasks()).size();
        int jobs = mrJobs + Utilities.getTezTasks(plan.getRootTasks()).size() + Utilities.getSparkTasks(plan.getRootTasks()).size();
        if (jobs > 0) {
            logMrWarning(mrJobs);
            console.printInfo("Query ID = " + queryId);
            console.printInfo("Total jobs = " + jobs);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
            SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
        }
        String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
        if (isInterrupted()) {
            return handleInterruption("before running tasks.");
        }
        DriverContext driverCxt = new DriverContext(ctx);
        driverCxt.prepare(plan);
        ctx.setHDFSCleanup(true);
        // for canceling the query (should be bound to session?)
        this.driverCxt = driverCxt;
        SessionState.get().setMapRedStats(new LinkedHashMap<String, MapRedStats>());
        SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
        SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
        // Add root Tasks to runnable
        for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
            // incorrect results.
            assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
            driverCxt.addToRunnable(tsk);
            if (metrics != null) {
                tsk.updateTaskMetrics(metrics);
            }
        }
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RUN_TASKS);
        // Loop while you either have tasks running, or tasks queued up
        while (driverCxt.isRunning()) {
            // Launch upto maxthreads tasks
            Task<? extends Serializable> task;
            while ((task = driverCxt.getRunnable(maxthreads)) != null) {
                TaskRunner runner = launchTask(task, queryId, noName, jobname, jobs, driverCxt);
                if (!runner.isRunning()) {
                    break;
                }
            }
            // poll the Tasks to see which one completed
            TaskRunner tskRun = driverCxt.pollFinished();
            if (tskRun == null) {
                continue;
            }
            hookContext.addCompleteTask(tskRun);
            queryDisplay.setTaskResult(tskRun.getTask().getId(), tskRun.getTaskResult());
            Task<? extends Serializable> tsk = tskRun.getTask();
            TaskResult result = tskRun.getTaskResult();
            int exitVal = result.getExitVal();
            if (isInterrupted()) {
                return handleInterruption("when checking the execution result.");
            }
            if (exitVal != 0) {
                if (tsk.ifRetryCmdWhenFail()) {
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    throw new CommandNeedRetryException();
                }
                Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
                if (backupTask != null) {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    console.printError(errorMessage);
                    errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
                    console.printError(errorMessage);
                    // add backup task to runnable
                    if (DriverContext.isLaunchable(backupTask)) {
                        driverCxt.addToRunnable(backupTask);
                    }
                    continue;
                } else {
                    setErrorMsgAndDetail(exitVal, result.getTaskError(), tsk);
                    invokeFailureHooks(perfLogger, hookContext, errorMessage + Strings.nullToEmpty(tsk.getDiagnosticsMessage()), result.getTaskError());
                    SQLState = "08S01";
                    console.printError(errorMessage);
                    driverCxt.shutdown();
                    // in case we decided to run everything in local mode, restore the
                    // the jobtracker setting to its initial value
                    ctx.restoreOriginalTracker();
                    return exitVal;
                }
            }
            driverCxt.finished(tskRun);
            if (SessionState.get() != null) {
                SessionState.get().getHiveHistory().setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
                SessionState.get().getHiveHistory().endTask(queryId, tsk);
            }
            if (tsk.getChildTasks() != null) {
                for (Task<? extends Serializable> child : tsk.getChildTasks()) {
                    if (DriverContext.isLaunchable(child)) {
                        driverCxt.addToRunnable(child);
                    }
                }
            }
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RUN_TASKS);
        // in case we decided to run everything in local mode, restore the
        // the jobtracker setting to its initial value
        ctx.restoreOriginalTracker();
        if (driverCxt.isShutdown()) {
            SQLState = "HY008";
            errorMessage = "FAILED: Operation cancelled";
            invokeFailureHooks(perfLogger, hookContext, errorMessage, null);
            console.printError(errorMessage);
            return 1000;
        }
        // remove incomplete outputs.
        // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
        // remove them
        HashSet<WriteEntity> remOutputs = new LinkedHashSet<WriteEntity>();
        for (WriteEntity output : plan.getOutputs()) {
            if (!output.isComplete()) {
                remOutputs.add(output);
            }
        }
        for (WriteEntity output : remOutputs) {
            plan.getOutputs().remove(output);
        }
        hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
        // Get all the post execution hooks and execute them.
        for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
            if (peh instanceof ExecuteWithHookContext) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((ExecuteWithHookContext) peh).run(hookContext);
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            } else if (peh instanceof PostExecute) {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
                ((PostExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo() : null), Utils.getUGI());
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.POST_HOOK + peh.getClass().getName());
            }
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
            SessionState.get().getHiveHistory().printRowCount(queryId);
        }
        releasePlan(plan);
    } catch (CommandNeedRetryException e) {
        executionError = true;
        throw e;
    } catch (Throwable e) {
        executionError = true;
        if (isInterrupted()) {
            return handleInterruption("during query execution: \n" + e.getMessage());
        }
        ctx.restoreOriginalTracker();
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
        }
        // TODO: do better with handling types of Exception here
        errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
        if (hookContext != null) {
            try {
                invokeFailureHooks(perfLogger, hookContext, errorMessage, e);
            } catch (Exception t) {
                LOG.warn("Failed to invoke failure hook", t);
            }
        }
        SQLState = "08S01";
        downstreamError = e;
        console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        return (12);
    } finally {
        // Trigger query hooks after query completes its execution.
        try {
            if (queryHooks != null && !queryHooks.isEmpty()) {
                QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
                qhc.setHiveConf(conf);
                qhc.setCommand(queryStr);
                qhc.setHookContext(hookContext);
                for (QueryLifeTimeHook hook : queryHooks) {
                    hook.afterExecution(qhc, executionError);
                }
            }
        } catch (Exception e) {
            LOG.warn("Failed when invoking query after execution hook", e);
        }
        if (SessionState.get() != null) {
            SessionState.get().getHiveHistory().endQuery(queryId);
        }
        if (noName) {
            conf.set(MRJobConfig.JOB_NAME, "");
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DRIVER_EXECUTE) / 1000.00;
        ImmutableMap<String, Long> executionHMSTimings = dumpMetaCallTimingWithoutEx("execution");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.EXECUTION, executionHMSTimings);
        Map<String, MapRedStats> stats = SessionState.get().getMapRedStats();
        if (stats != null && !stats.isEmpty()) {
            long totalCpu = 0;
            console.printInfo("MapReduce Jobs Launched: ");
            for (Map.Entry<String, MapRedStats> entry : stats.entrySet()) {
                console.printInfo("Stage-" + entry.getKey() + ": " + entry.getValue());
                totalCpu += entry.getValue().getCpuMSec();
            }
            console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
        }
        boolean isInterrupted = isInterrupted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                if (!deferClose) {
                    lDrvState.driverState = DriverState.ERROR;
                }
            } else {
                lDrvState.driverState = executionError ? DriverState.ERROR : DriverState.EXECUTED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Executing command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed executing command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
    if (console != null) {
        console.printInfo("OK");
    }
    return (0);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SessionState(org.apache.hadoop.hive.ql.session.SessionState) QueryLifeTimeHookContextImpl(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContextImpl) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) TaskRunner(org.apache.hadoop.hive.ql.exec.TaskRunner) Metrics(org.apache.hadoop.hive.common.metrics.common.Metrics) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) List(java.util.List) LinkedList(java.util.LinkedList) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HiveSemanticAnalyzerHook(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook) QueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHook) Hook(org.apache.hadoop.hive.ql.hooks.Hook) MetricsQueryLifeTimeHook(org.apache.hadoop.hive.ql.hooks.MetricsQueryLifeTimeHook) PreExecute(org.apache.hadoop.hive.ql.hooks.PreExecute) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) PostExecute(org.apache.hadoop.hive.ql.hooks.PostExecute) ExecuteWithHookContext(org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext) TaskResult(org.apache.hadoop.hive.ql.exec.TaskResult) QueryLifeTimeHookContext(org.apache.hadoop.hive.ql.hooks.QueryLifeTimeHookContext) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 3 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method compile.

// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
private void compile(String command, boolean resetTaskIds, boolean deferClose) throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger(true);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
    lDrvState.stateLock.lock();
    try {
        lDrvState.driverState = DriverState.COMPILING;
    } finally {
        lDrvState.stateLock.unlock();
    }
    command = new VariableSubstitution(new HiveVariableSource() {

        @Override
        public Map<String, String> getHiveVariable() {
            return SessionState.get().getHiveVariables();
        }
    }).substitute(conf, command);
    String queryStr = command;
    try {
        // command should be redacted to avoid to logging sensitive data
        queryStr = HookUtils.redactLogString(conf, command);
    } catch (Exception e) {
        LOG.warn("WARNING! Query command could not be redacted." + e);
    }
    checkInterrupted("at beginning of compilation.", null, null);
    if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
        // close the existing ctx etc before compiling a new query, but does not destroy driver
        closeInProcess(false);
    }
    if (resetTaskIds) {
        TaskFactory.resetId();
    }
    LockedDriverState.setLockedDriverState(lDrvState);
    String queryId = queryState.getQueryId();
    if (ctx != null) {
        setTriggerContext(queryId);
    }
    // save some info for webUI for use after plan is freed
    this.queryDisplay.setQueryStr(queryStr);
    this.queryDisplay.setQueryId(queryId);
    LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
    conf.setQueryString(queryStr);
    // FIXME: sideeffect will leave the last query set at the session level
    SessionState.get().getConf().setQueryString(queryStr);
    SessionState.get().setupQueryCurrentTimestamp();
    // Whether any error occurred during query compilation. Used for query lifetime hook.
    boolean compileError = false;
    boolean parseError = false;
    try {
        // Initialize the transaction manager.  This must be done before analyze is called.
        if (initTxnMgr != null) {
            queryTxnMgr = initTxnMgr;
        } else {
            queryTxnMgr = SessionState.get().initTxnMgr(conf);
        }
        if (queryTxnMgr instanceof Configurable) {
            ((Configurable) queryTxnMgr).setConf(conf);
        }
        queryState.setTxnManager(queryTxnMgr);
        // In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
        // if compile is being called multiple times, clear the old shutdownhook
        ShutdownHookManager.removeShutdownHook(shutdownRunner);
        final HiveTxnManager txnMgr = queryTxnMgr;
        shutdownRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    releaseLocksAndCommitOrRollback(false, txnMgr);
                } catch (LockException e) {
                    LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
                }
            }
        };
        ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
        checkInterrupted("before parsing and analysing the query", null, null);
        if (ctx == null) {
            ctx = new Context(conf);
            setTriggerContext(queryId);
        }
        ctx.setRuntimeStatsSource(runtimeStatsSource);
        ctx.setCmd(command);
        ctx.setHDFSCleanup(true);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
        // Trigger query hook before compilation
        hookRunner.runBeforeParseHook(command);
        ASTNode tree;
        try {
            tree = ParseUtils.parse(command, ctx);
        } catch (ParseException e) {
            parseError = true;
            throw e;
        } finally {
            hookRunner.runAfterParseHook(command, parseError);
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
        hookRunner.runBeforeCompileHook(command);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
        // Flush the metastore cache.  This assures that we don't pick up objects from a previous
        // query running in this same thread.  This has to be done after we get our semantic
        // analyzer (this is when the connection to the metastore is made) but before we analyze,
        // because at that point we need access to the objects.
        Hive.get().getMSC().flushCache();
        BaseSemanticAnalyzer sem;
        // Do semantic analysis and plan generation
        if (hookRunner.hasPreAnalyzeHooks()) {
            HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
            hookCtx.setConf(conf);
            hookCtx.setUserName(userName);
            hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
            hookCtx.setCommand(command);
            hookCtx.setHiveOperation(queryState.getHiveOperation());
            tree = hookRunner.runPreAnalyzeHooks(hookCtx, tree);
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
            hookCtx.update(sem);
            hookRunner.runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
        } else {
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
        }
        LOG.info("Semantic Analysis Completed");
        // Retrieve information about cache usage for the query.
        if (conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
            cacheUsage = sem.getCacheUsage();
        }
        // validate the plan
        sem.validate();
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
        checkInterrupted("after analyzing query.", null, null);
        // get the output schema
        schema = getSchema(sem, conf);
        plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
        conf.set("mapreduce.workflow.id", "hive_" + queryId);
        conf.set("mapreduce.workflow.name", queryStr);
        // initialize FetchTask right here
        if (plan.getFetchTask() != null) {
            plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
        }
        // do the authorization check
        if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
            try {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
                doAuthorization(queryState.getHiveOperation(), sem, command);
            } catch (AuthorizationException authExp) {
                console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
                errorMessage = authExp.getMessage();
                SQLState = "42000";
                throw createProcessorResponse(403);
            } finally {
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
            }
        }
        if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
            String explainOutput = getExplainOutput(sem, plan, tree);
            if (explainOutput != null) {
                LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
                if (conf.isWebUiQueryInfoCacheEnabled()) {
                    queryDisplay.setExplainPlan(explainOutput);
                }
            }
        }
    } catch (CommandProcessorResponse cpr) {
        throw cpr;
    } catch (Exception e) {
        checkInterrupted("during query compilation: " + e.getMessage(), null, null);
        compileError = true;
        ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
        errorMessage = "FAILED: " + e.getClass().getSimpleName();
        if (error != ErrorMsg.GENERIC_ERROR) {
            errorMessage += " [Error " + error.getErrorCode() + "]:";
        }
        // HIVE-4889
        if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
            errorMessage += " " + e.getCause().getMessage();
        } else {
            errorMessage += " " + e.getMessage();
        }
        if (error == ErrorMsg.TXNMGR_NOT_ACID) {
            errorMessage += ". Failed command: " + queryStr;
        }
        SQLState = error.getSQLState();
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(error.getErrorCode());
    } finally {
        // before/after execution hook will never be executed.
        if (!parseError) {
            try {
                hookRunner.runAfterCompilationHook(command, compileError);
            } catch (Exception e) {
                LOG.warn("Failed when invoking query after-compilation hook.", e);
            }
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
        ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
        boolean isInterrupted = lDrvState.isAborted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
            } else {
                lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveVariableSource(org.apache.hadoop.hive.conf.HiveVariableSource) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Configurable(org.apache.hadoop.conf.Configurable) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) PrivateHookContext(org.apache.hadoop.hive.ql.hooks.PrivateHookContext) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) HiveAuthzContext(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) VariableSubstitution(org.apache.hadoop.hive.conf.VariableSubstitution) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveSemanticAnalyzerHookContextImpl(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 4 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method postExecutionCacheActions.

private void postExecutionCacheActions() throws Exception {
    if (cacheUsage != null) {
        if (cacheUsage.getStatus() == CacheUsage.CacheStatus.QUERY_USING_CACHE) {
            // Using a previously cached result.
            CacheEntry cacheEntry = cacheUsage.getCacheEntry();
            // Reader count already incremented during cache lookup.
            // Save to usedCacheEntry to ensure reader is released after query.
            this.usedCacheEntry = cacheEntry;
        } else if (cacheUsage.getStatus() == CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS && cacheUsage.getCacheEntry() != null && plan.getFetchTask() != null) {
            // Save results to the cache for future queries to use.
            PerfLogger perfLogger = SessionState.getPerfLogger();
            perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
            boolean savedToCache = QueryResultsCache.getInstance().setEntryValid(cacheUsage.getCacheEntry(), plan.getFetchTask().getWork());
            LOG.info("savedToCache: {}", savedToCache);
            if (savedToCache) {
                useFetchFromCache(cacheUsage.getCacheEntry());
                // setEntryValid() already increments the reader count. Set usedCacheEntry so it gets released.
                this.usedCacheEntry = cacheUsage.getCacheEntry();
            }
            perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
        }
    }
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) CacheEntry(org.apache.hadoop.hive.ql.cache.results.QueryResultsCache.CacheEntry)

Example 5 with PerfLogger

use of org.apache.hadoop.hive.ql.log.PerfLogger in project hive by apache.

the class Driver method releaseLocksAndCommitOrRollback.

/**
 * @param commit if there is an open transaction and if true, commit,
 *               if false rollback.  If there is no open transaction this parameter is ignored.
 * @param txnManager an optional existing transaction manager retrieved earlier from the session
 */
@VisibleForTesting
public void releaseLocksAndCommitOrRollback(boolean commit, HiveTxnManager txnManager) throws LockException {
    PerfLogger perfLogger = SessionState.getPerfLogger();
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
    HiveTxnManager txnMgr;
    if (txnManager == null) {
        // Default to driver's txn manager if no txn manager specified
        txnMgr = queryTxnMgr;
    } else {
        txnMgr = txnManager;
    }
    // If we've opened a transaction we need to commit or rollback rather than explicitly
    // releasing the locks.
    conf.unset(ValidTxnList.VALID_TXNS_KEY);
    conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
    if (!checkConcurrency()) {
        return;
    }
    if (txnMgr.isTxnOpen()) {
        if (commit) {
            if (conf.getBoolVar(ConfVars.HIVE_IN_TEST) && conf.getBoolVar(ConfVars.HIVETESTMODEROLLBACKTXN)) {
                txnMgr.rollbackTxn();
            } else {
                // both commit & rollback clear ALL locks for this tx
                txnMgr.commitTxn();
            }
        } else {
            txnMgr.rollbackTxn();
        }
    } else {
        // since there is no tx, we only have locks for current query (if any)
        if (ctx != null && ctx.getHiveLocks() != null) {
            hiveLocks.addAll(ctx.getHiveLocks());
        }
        txnMgr.releaseLocks(hiveLocks);
    }
    hiveLocks.clear();
    if (ctx != null) {
        ctx.setHiveLocks(null);
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.RELEASE_LOCKS);
}
Also used : PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

PerfLogger (org.apache.hadoop.hive.ql.log.PerfLogger)55 IOException (java.io.IOException)20 ArrayList (java.util.ArrayList)18 LockException (org.apache.hadoop.hive.ql.lockmgr.LockException)16 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)12 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)11 TException (org.apache.thrift.TException)11 ExecutionException (java.util.concurrent.ExecutionException)10 HiveMetaException (org.apache.hadoop.hive.metastore.HiveMetaException)10 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)10 Path (org.apache.hadoop.fs.Path)9 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)9 FileNotFoundException (java.io.FileNotFoundException)8 UnknownHostException (java.net.UnknownHostException)8 JDODataStoreException (javax.jdo.JDODataStoreException)8 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8 TApplicationException (org.apache.thrift.TApplicationException)8 LinkedList (java.util.LinkedList)7