use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.
the class Driver method compile.
// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
private void compile(String command, boolean resetTaskIds, boolean deferClose) throws CommandProcessorResponse {
PerfLogger perfLogger = SessionState.getPerfLogger(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
lDrvState.stateLock.lock();
try {
lDrvState.driverState = DriverState.COMPILING;
} finally {
lDrvState.stateLock.unlock();
}
command = new VariableSubstitution(new HiveVariableSource() {
@Override
public Map<String, String> getHiveVariable() {
return SessionState.get().getHiveVariables();
}
}).substitute(conf, command);
String queryStr = command;
try {
// command should be redacted to avoid to logging sensitive data
queryStr = HookUtils.redactLogString(conf, command);
} catch (Exception e) {
LOG.warn("WARNING! Query command could not be redacted." + e);
}
checkInterrupted("at beginning of compilation.", null, null);
if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
// close the existing ctx etc before compiling a new query, but does not destroy driver
closeInProcess(false);
}
if (resetTaskIds) {
TaskFactory.resetId();
}
LockedDriverState.setLockedDriverState(lDrvState);
String queryId = queryState.getQueryId();
if (ctx != null) {
setTriggerContext(queryId);
}
// save some info for webUI for use after plan is freed
this.queryDisplay.setQueryStr(queryStr);
this.queryDisplay.setQueryId(queryId);
LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
conf.setQueryString(queryStr);
// FIXME: sideeffect will leave the last query set at the session level
SessionState.get().getConf().setQueryString(queryStr);
SessionState.get().setupQueryCurrentTimestamp();
// Whether any error occurred during query compilation. Used for query lifetime hook.
boolean compileError = false;
boolean parseError = false;
try {
// Initialize the transaction manager. This must be done before analyze is called.
if (initTxnMgr != null) {
queryTxnMgr = initTxnMgr;
} else {
queryTxnMgr = SessionState.get().initTxnMgr(conf);
}
if (queryTxnMgr instanceof Configurable) {
((Configurable) queryTxnMgr).setConf(conf);
}
queryState.setTxnManager(queryTxnMgr);
// In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
// if compile is being called multiple times, clear the old shutdownhook
ShutdownHookManager.removeShutdownHook(shutdownRunner);
final HiveTxnManager txnMgr = queryTxnMgr;
shutdownRunner = new Runnable() {
@Override
public void run() {
try {
releaseLocksAndCommitOrRollback(false, txnMgr);
} catch (LockException e) {
LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
}
}
};
ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
checkInterrupted("before parsing and analysing the query", null, null);
if (ctx == null) {
ctx = new Context(conf);
setTriggerContext(queryId);
}
ctx.setRuntimeStatsSource(runtimeStatsSource);
ctx.setCmd(command);
ctx.setHDFSCleanup(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
// Trigger query hook before compilation
hookRunner.runBeforeParseHook(command);
ASTNode tree;
try {
tree = ParseUtils.parse(command, ctx);
} catch (ParseException e) {
parseError = true;
throw e;
} finally {
hookRunner.runAfterParseHook(command, parseError);
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
hookRunner.runBeforeCompileHook(command);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
// Flush the metastore cache. This assures that we don't pick up objects from a previous
// query running in this same thread. This has to be done after we get our semantic
// analyzer (this is when the connection to the metastore is made) but before we analyze,
// because at that point we need access to the objects.
Hive.get().getMSC().flushCache();
BaseSemanticAnalyzer sem;
// Do semantic analysis and plan generation
if (hookRunner.hasPreAnalyzeHooks()) {
HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
hookCtx.setConf(conf);
hookCtx.setUserName(userName);
hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
hookCtx.setCommand(command);
hookCtx.setHiveOperation(queryState.getHiveOperation());
tree = hookRunner.runPreAnalyzeHooks(hookCtx, tree);
sem = SemanticAnalyzerFactory.get(queryState, tree);
openTransaction();
sem.analyze(tree, ctx);
hookCtx.update(sem);
hookRunner.runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
} else {
sem = SemanticAnalyzerFactory.get(queryState, tree);
openTransaction();
sem.analyze(tree, ctx);
}
LOG.info("Semantic Analysis Completed");
// Retrieve information about cache usage for the query.
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
cacheUsage = sem.getCacheUsage();
}
// validate the plan
sem.validate();
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
checkInterrupted("after analyzing query.", null, null);
// get the output schema
schema = getSchema(sem, conf);
plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
conf.set("mapreduce.workflow.id", "hive_" + queryId);
conf.set("mapreduce.workflow.name", queryStr);
// initialize FetchTask right here
if (plan.getFetchTask() != null) {
plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
}
// do the authorization check
if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
doAuthorization(queryState.getHiveOperation(), sem, command);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
errorMessage = authExp.getMessage();
SQLState = "42000";
throw createProcessorResponse(403);
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
}
}
if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
String explainOutput = getExplainOutput(sem, plan, tree);
if (explainOutput != null) {
LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
if (conf.isWebUiQueryInfoCacheEnabled()) {
queryDisplay.setExplainPlan(explainOutput);
}
}
}
} catch (CommandProcessorResponse cpr) {
throw cpr;
} catch (Exception e) {
checkInterrupted("during query compilation: " + e.getMessage(), null, null);
compileError = true;
ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
errorMessage = "FAILED: " + e.getClass().getSimpleName();
if (error != ErrorMsg.GENERIC_ERROR) {
errorMessage += " [Error " + error.getErrorCode() + "]:";
}
// HIVE-4889
if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
errorMessage += " " + e.getCause().getMessage();
} else {
errorMessage += " " + e.getMessage();
}
if (error == ErrorMsg.TXNMGR_NOT_ACID) {
errorMessage += ". Failed command: " + queryStr;
}
SQLState = error.getSQLState();
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
throw createProcessorResponse(error.getErrorCode());
} finally {
// before/after execution hook will never be executed.
if (!parseError) {
try {
hookRunner.runAfterCompilationHook(command, compileError);
} catch (Exception e) {
LOG.warn("Failed when invoking query after-compilation hook.", e);
}
}
double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
boolean isInterrupted = lDrvState.isAborted();
if (isInterrupted && !deferClose) {
closeInProcess(true);
}
lDrvState.stateLock.lock();
try {
if (isInterrupted) {
lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
} else {
lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
}
} finally {
lDrvState.stateLock.unlock();
}
if (isInterrupted) {
LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
} else {
LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
}
}
}
use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.
the class AccurateEstimatesCheckerHook method postAnalyze.
@Override
public void postAnalyze(HiveSemanticAnalyzerHookContext context, List<Task<?>> rootTasks) throws SemanticException {
HiveSemanticAnalyzerHookContext hookContext = context;
HiveConf conf = (HiveConf) hookContext.getConf();
absErr = conf.getDouble("accurate.estimate.checker.absolute.error", 3.0);
relErr = conf.getDouble("accurate.estimate.checker.relative.error", .1);
List<Node> rootOps = Lists.newArrayList();
List<Task<?>> roots = rootTasks;
for (Task<?> task0 : roots) {
if (task0 instanceof ExplainTask) {
ExplainTask explainTask = (ExplainTask) task0;
ExplainWork w = explainTask.getWork();
List<Task<?>> explainRoots = w.getRootTasks();
for (Task<?> task : explainRoots) {
Object work = task.getWork();
if (work instanceof MapredWork) {
MapredWork mapredWork = (MapredWork) work;
MapWork mapWork = mapredWork.getMapWork();
if (mapWork != null) {
rootOps.addAll(mapWork.getAllRootOperators());
}
ReduceWork reduceWork = mapredWork.getReduceWork();
if (reduceWork != null) {
rootOps.addAll(reduceWork.getAllRootOperators());
}
}
if (work instanceof TezWork) {
for (BaseWork bw : ((TezWork) work).getAllWorkUnsorted()) {
rootOps.addAll(bw.getAllRootOperators());
}
}
}
}
}
if (rootOps.isEmpty()) {
return;
}
SemanticDispatcher disp = new DefaultRuleDispatcher(new EstimateCheckerHook(), new HashMap<>(), null);
SemanticGraphWalker ogw = new DefaultGraphWalker(disp);
HashMap<Node, Object> nodeOutput = new HashMap<Node, Object>();
ogw.startWalking(rootOps, nodeOutput);
}
use of org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext in project hive by apache.
the class Compiler method analyze.
private BaseSemanticAnalyzer analyze() throws Exception {
perfLogger.perfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
driverContext.getHookRunner().runBeforeCompileHook(context.getCmd());
// clear CurrentFunctionsInUse set, to capture new set of functions
// that SemanticAnalyzer finds are in use
SessionState.get().getCurrentFunctionsInUse().clear();
// Flush the metastore cache. This assures that we don't pick up objects from a previous
// query running in this same thread. This has to be done after we get our semantic
// analyzer (this is when the connection to the metastore is made) but before we analyze,
// because at that point we need access to the objects.
Hive.get().getMSC().flushCache();
driverContext.setBackupContext(new Context(context));
boolean executeHooks = driverContext.getHookRunner().hasPreAnalyzeHooks();
HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
if (executeHooks) {
hookCtx.setConf(driverContext.getConf());
hookCtx.setUserName(SessionState.get().getUserName());
hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
hookCtx.setCommand(context.getCmd());
hookCtx.setHiveOperation(driverContext.getQueryState().getHiveOperation());
tree = driverContext.getHookRunner().runPreAnalyzeHooks(hookCtx, tree);
}
// SemanticAnalyzerFactory also sets the hive operation in query state
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(driverContext.getQueryState(), tree);
if (!driverContext.isRetrial()) {
if (HiveOperation.REPLDUMP.equals(driverContext.getQueryState().getHiveOperation())) {
setLastReplIdForDump(driverContext.getQueryState().getConf());
}
driverContext.setTxnType(AcidUtils.getTxnType(driverContext.getConf(), tree));
openTransaction(driverContext.getTxnType());
generateValidTxnList();
}
// Do semantic analysis and plan generation
try {
sem.startAnalysis();
sem.analyze(tree, context);
} finally {
sem.endAnalysis();
}
if (executeHooks) {
hookCtx.update(sem);
driverContext.getHookRunner().runPostAnalyzeHooks(hookCtx, sem.getAllRootTasks());
}
LOG.info("Semantic Analysis Completed (retrial = {})", driverContext.isRetrial());
// Retrieve information about cache usage for the query.
if (driverContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED)) {
driverContext.setCacheUsage(sem.getCacheUsage());
}
// validate the plan
sem.validate();
perfLogger.perfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
return sem;
}
Aggregations