Search in sources :

Example 1 with HiveSemanticAnalyzerHookContext

use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.

the class Driver method compile.

// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
private void compile(String command, boolean resetTaskIds, boolean deferClose) throws CommandProcessorResponse {
    PerfLogger perfLogger = SessionState.getPerfLogger(true);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
    lDrvState.stateLock.lock();
    try {
        lDrvState.driverState = DriverState.COMPILING;
    } finally {
        lDrvState.stateLock.unlock();
    }
    command = new VariableSubstitution(new HiveVariableSource() {

        @Override
        public Map<String, String> getHiveVariable() {
            return SessionState.get().getHiveVariables();
        }
    }).substitute(conf, command);
    String queryStr = command;
    try {
        // command should be redacted to avoid to logging sensitive data
        queryStr = HookUtils.redactLogString(conf, command);
    } catch (Exception e) {
        LOG.warn("WARNING! Query command could not be redacted." + e);
    }
    checkInterrupted("at beginning of compilation.", null, null);
    if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
        // close the existing ctx etc before compiling a new query, but does not destroy driver
        closeInProcess(false);
    }
    if (resetTaskIds) {
        TaskFactory.resetId();
    }
    LockedDriverState.setLockedDriverState(lDrvState);
    String queryId = queryState.getQueryId();
    if (ctx != null) {
        setTriggerContext(queryId);
    }
    // save some info for webUI for use after plan is freed
    this.queryDisplay.setQueryStr(queryStr);
    this.queryDisplay.setQueryId(queryId);
    LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
    conf.setQueryString(queryStr);
    // FIXME: sideeffect will leave the last query set at the session level
    SessionState.get().getConf().setQueryString(queryStr);
    SessionState.get().setupQueryCurrentTimestamp();
    // Whether any error occurred during query compilation. Used for query lifetime hook.
    boolean compileError = false;
    boolean parseError = false;
    try {
        // Initialize the transaction manager.  This must be done before analyze is called.
        if (initTxnMgr != null) {
            queryTxnMgr = initTxnMgr;
        } else {
            queryTxnMgr = SessionState.get().initTxnMgr(conf);
        }
        if (queryTxnMgr instanceof Configurable) {
            ((Configurable) queryTxnMgr).setConf(conf);
        }
        queryState.setTxnManager(queryTxnMgr);
        // In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
        // if compile is being called multiple times, clear the old shutdownhook
        ShutdownHookManager.removeShutdownHook(shutdownRunner);
        final HiveTxnManager txnMgr = queryTxnMgr;
        shutdownRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    releaseLocksAndCommitOrRollback(false, txnMgr);
                } catch (LockException e) {
                    LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
                }
            }
        };
        ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
        checkInterrupted("before parsing and analysing the query", null, null);
        if (ctx == null) {
            ctx = new Context(conf);
            setTriggerContext(queryId);
        }
        ctx.setRuntimeStatsSource(runtimeStatsSource);
        ctx.setCmd(command);
        ctx.setHDFSCleanup(true);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
        // Trigger query hook before compilation
        hookRunner.runBeforeParseHook(command);
        ASTNode tree;
        try {
            tree = ParseUtils.parse(command, ctx);
        } catch (ParseException e) {
            parseError = true;
            throw e;
        } finally {
            hookRunner.runAfterParseHook(command, parseError);
        }
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
        hookRunner.runBeforeCompileHook(command);
        perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
        // Flush the metastore cache.  This assures that we don't pick up objects from a previous
        // query running in this same thread.  This has to be done after we get our semantic
        // analyzer (this is when the connection to the metastore is made) but before we analyze,
        // because at that point we need access to the objects.
        Hive.get().getMSC().flushCache();
        BaseSemanticAnalyzer sem;
        // Do semantic analysis and plan generation
        if (hookRunner.hasPreAnalyzeHooks()) {
            HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
            hookCtx.setConf(conf);
            hookCtx.setUserName(userName);
            hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
            hookCtx.setCommand(command);
            hookCtx.setHiveOperation(queryState.getHiveOperation());
            tree = hookRunner.runPreAnalyzeHooks(hookCtx, tree);
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
            hookCtx.update(sem);
            hookRunner.runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
        } else {
            sem = SemanticAnalyzerFactory.get(queryState, tree);
            openTransaction();
            sem.analyze(tree, ctx);
        }
        LOG.info("Semantic Analysis Completed");
        // Retrieve information about cache usage for the query.
        if (conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
            cacheUsage = sem.getCacheUsage();
        }
        // validate the plan
        sem.validate();
        perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
        checkInterrupted("after analyzing query.", null, null);
        // get the output schema
        schema = getSchema(sem, conf);
        plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
        conf.set("mapreduce.workflow.id", "hive_" + queryId);
        conf.set("mapreduce.workflow.name", queryStr);
        // initialize FetchTask right here
        if (plan.getFetchTask() != null) {
            plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
        }
        // do the authorization check
        if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
            try {
                perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
                doAuthorization(queryState.getHiveOperation(), sem, command);
            } catch (AuthorizationException authExp) {
                console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
                errorMessage = authExp.getMessage();
                SQLState = "42000";
                throw createProcessorResponse(403);
            } finally {
                perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
            }
        }
        if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
            String explainOutput = getExplainOutput(sem, plan, tree);
            if (explainOutput != null) {
                LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
                if (conf.isWebUiQueryInfoCacheEnabled()) {
                    queryDisplay.setExplainPlan(explainOutput);
                }
            }
        }
    } catch (CommandProcessorResponse cpr) {
        throw cpr;
    } catch (Exception e) {
        checkInterrupted("during query compilation: " + e.getMessage(), null, null);
        compileError = true;
        ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
        errorMessage = "FAILED: " + e.getClass().getSimpleName();
        if (error != ErrorMsg.GENERIC_ERROR) {
            errorMessage += " [Error " + error.getErrorCode() + "]:";
        }
        // HIVE-4889
        if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
            errorMessage += " " + e.getCause().getMessage();
        } else {
            errorMessage += " " + e.getMessage();
        }
        if (error == ErrorMsg.TXNMGR_NOT_ACID) {
            errorMessage += ". Failed command: " + queryStr;
        }
        SQLState = error.getSQLState();
        downstreamError = e;
        console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
        throw createProcessorResponse(error.getErrorCode());
    } finally {
        // before/after execution hook will never be executed.
        if (!parseError) {
            try {
                hookRunner.runAfterCompilationHook(command, compileError);
            } catch (Exception e) {
                LOG.warn("Failed when invoking query after-compilation hook.", e);
            }
        }
        double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
        ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
        queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
        boolean isInterrupted = lDrvState.isAborted();
        if (isInterrupted && !deferClose) {
            closeInProcess(true);
        }
        lDrvState.stateLock.lock();
        try {
            if (isInterrupted) {
                lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
            } else {
                lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
            }
        } finally {
            lDrvState.stateLock.unlock();
        }
        if (isInterrupted) {
            LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
        } else {
            LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
        }
    }
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveVariableSource(org.apache.hadoop.hive.conf.HiveVariableSource) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) Configurable(org.apache.hadoop.conf.Configurable) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) ASTNode(org.apache.hadoop.hive.ql.parse.ASTNode) PrivateHookContext(org.apache.hadoop.hive.ql.hooks.PrivateHookContext) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) HiveAuthzContext(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext) WmContext(org.apache.hadoop.hive.ql.wm.WmContext) HookContext(org.apache.hadoop.hive.ql.hooks.HookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) VariableSubstitution(org.apache.hadoop.hive.conf.VariableSubstitution) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AuthorizationException(org.apache.hadoop.hive.ql.metadata.AuthorizationException) HiveSemanticAnalyzerHookContextImpl(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) ParseException(org.apache.hadoop.hive.ql.parse.ParseException) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 2 with HiveSemanticAnalyzerHookContext

use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.

the class AccurateEstimatesCheckerHook method postAnalyze.

@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
    HiveSemanticAnalyzerHookContext hookContext = context;
    HiveConf conf = (HiveConf) hookContext.getConf();
    absErr = conf.getDouble("accurate.estimate.checker.absolute.error", 3.0);
    relErr = conf.getDouble("accurate.estimate.checker.relative.error", .1);
    List<Node> rootOps = Lists.newArrayList();
    List<Task<?>> roots = rootTasks;
    for (Task<?> task0 : roots) {
        if (task0 instanceof ExplainTask) {
            ExplainTask explainTask = (ExplainTask) task0;
            ExplainWork w = explainTask.getWork();
            List<Task<?>> explainRoots = w.getRootTasks();
            for (Task<?> task : explainRoots) {
                Object work = task.getWork();
                if (work instanceof MapredWork) {
                    MapredWork mapredWork = (MapredWork) work;
                    MapWork mapWork = mapredWork.getMapWork();
                    if (mapWork != null) {
                        rootOps.addAll(mapWork.getAllRootOperators());
                    }
                    ReduceWork reduceWork = mapredWork.getReduceWork();
                    if (reduceWork != null) {
                        rootOps.addAll(reduceWork.getAllRootOperators());
                    }
                }
                if (work instanceof TezWork) {
                    for (BaseWork bw : ((TezWork) work).getAllWorkUnsorted()) {
                        rootOps.addAll(bw.getAllRootOperators());
                    }
                }
            }
        }
    }
    if (rootOps.isEmpty()) {
        return;
    }
    SemanticDispatcher disp = new DefaultRuleDispatcher(new EstimateCheckerHook(), new HashMap<>(), null);
    SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
    HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
    ogw.startWalking(rootOps, nodeOutput);
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) Task(org.apache.hadoop.hive.ql.exec.Task) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) DefaultRuleDispatcher(org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher) HashMap(java.util.HashMap) Node(org.apache.hadoop.hive.ql.lib.Node) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) ExplainWork(org.apache.hadoop.hive.ql.plan.ExplainWork) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) HiveConf(org.apache.hadoop.hive.conf.HiveConf) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork) TezWork(org.apache.hadoop.hive.ql.plan.TezWork)

Example 3 with HiveSemanticAnalyzerHookContext

use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.

the class Compiler method analyze.

private BaseSemanticAnalyzer analyze() throws Exception {
    perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
    driverContext.getHookRunner().runBeforeCompileHook(context.getCmd());
    // clear CurrentFunctionsInUse set, to capture new set of functions
    // that SemanticAnalyzer finds are in use
    SessionState.get().getCurrentFunctionsInUse().clear();
    // Flush the metastore cache.  This assures that we don't pick up objects from a previous
    // query running in this same thread.  This has to be done after we get our semantic
    // analyzer (this is when the connection to the metastore is made) but before we analyze,
    // because at that point we need access to the objects.
    Hive.get().getMSC().flushCache();
    driverContext.setBackupContext(new Context(context));
    boolean executeHooks = driverContext.getHookRunner().hasPreAnalyzeHooks();
    HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
    if (executeHooks) {
        hookCtx.setConf(driverContext.getConf());
        hookCtx.setUserName(SessionState.get().getUserName());
        hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
        hookCtx.setCommand(context.getCmd());
        hookCtx.setHiveOperation(driverContext.getQueryState().getHiveOperation());
        tree = driverContext.getHookRunner().runPreAnalyzeHooks(hookCtx, tree);
    }
    // SemanticAnalyzerFactory also sets the hive operation in query state
    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(driverContext.getQueryState(), tree);
    if (!driverContext.isRetrial()) {
        if (HiveOperation.REPLDUMP.equals(driverContext.getQueryState().getHiveOperation())) {
            setLastReplIdForDump(driverContext.getQueryState().getConf());
        }
        driverContext.setTxnType(AcidUtils.getTxnType(driverContext.getConf(), tree));
        openTransaction(driverContext.getTxnType());
        generateValidTxnList();
    }
    // Do semantic analysis and plan generation
    try {
        sem.startAnalysis();
        sem.analyze(tree, context);
    } finally {
        sem.endAnalysis();
    }
    if (executeHooks) {
        hookCtx.update(sem);
        driverContext.getHookRunner().runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
    }
    LOG.info("Semantic Analysis Completed (retrial = {})", driverContext.isRetrial());
    // Retrieve information about cache usage for the query.
    if (driverContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
        driverContext.setCacheUsage(sem.getCacheUsage());
    }
    // validate the plan
    sem.validate();
    perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
    return sem;
}
Also used : HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) HiveSemanticAnalyzerHookContext(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext) BaseSemanticAnalyzer(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer) HiveSemanticAnalyzerHookContextImpl(org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl)

Aggregations

HiveSemanticAnalyzerHookContext (org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext)3 HashMap (java.util.HashMap)2 BaseSemanticAnalyzer (org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer)2 HiveSemanticAnalyzerHookContextImpl (org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Configurable (org.apache.hadoop.conf.Configurable)1 HiveConf (org.apache.hadoop.hive.conf.HiveConf)1 HiveVariableSource (org.apache.hadoop.hive.conf.HiveVariableSource)1 VariableSubstitution (org.apache.hadoop.hive.conf.VariableSubstitution)1 ExplainTask (org.apache.hadoop.hive.ql.exec.ExplainTask)1 Task (org.apache.hadoop.hive.ql.exec.Task)1 HookContext (org.apache.hadoop.hive.ql.hooks.HookContext)1 PrivateHookContext (org.apache.hadoop.hive.ql.hooks.PrivateHookContext)1 DefaultGraphWalker (org.apache.hadoop.hive.ql.lib.DefaultGraphWalker)1 DefaultRuleDispatcher (org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher)1 Node (org.apache.hadoop.hive.ql.lib.Node)1 SemanticDispatcher (org.apache.hadoop.hive.ql.lib.SemanticDispatcher)1