use of org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer in project hive by apache.
the class ExplainTask method collectAuthRelatedEntities.
private JSONObject collectAuthRelatedEntities(PrintStream out, ExplainWork work) throws Exception {
BaseSemanticAnalyzer analyzer = work.getAnalyzer();
HiveOperation operation = queryState.getHiveOperation();
JSONObject object = new JSONObject(new LinkedHashMap<>());
Object jsonInput = toJson("INPUTS", toString(analyzer.getInputs()), out, work);
if (work.isFormatted()) {
object.put("INPUTS", jsonInput);
}
Object jsonOutput = toJson("OUTPUTS", toString(analyzer.getOutputs()), out, work);
if (work.isFormatted()) {
object.put("OUTPUTS", jsonOutput);
}
String userName = SessionState.get().getAuthenticator().getUserName();
Object jsonUser = toJson("CURRENT_USER", userName, out, work);
if (work.isFormatted()) {
object.put("CURRENT_USER", jsonUser);
}
Object jsonOperation = toJson("OPERATION", operation.name(), out, work);
if (work.isFormatted()) {
object.put("OPERATION", jsonOperation);
}
if (analyzer.skipAuthorization()) {
return object;
}
final List<String> exceptions = new ArrayList<String>();
Object delegate = SessionState.get().getActiveAuthorizer();
if (delegate != null) {
Class itface = SessionState.get().getAuthorizerInterface();
Object authorizer = AuthorizationFactory.create(delegate, itface, new AuthorizationFactory.AuthorizationExceptionHandler() {
public void exception(Exception exception) {
exceptions.add(exception.getMessage());
}
});
SessionState.get().setActiveAuthorizer(authorizer);
try {
Driver.doAuthorization(queryState.getHiveOperation(), analyzer, "");
} finally {
SessionState.get().setActiveAuthorizer(delegate);
}
}
if (!exceptions.isEmpty()) {
Object jsonFails = toJson("AUTHORIZATION_FAILURES", exceptions, out, work);
if (work.isFormatted()) {
object.put("AUTHORIZATION_FAILURES", jsonFails);
}
}
return object;
}
use of org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer in project hive by apache.
the class RewriteParseContextGenerator method generateOperatorTree.
/**
* Parse the input {@link String} command and generate an operator tree.
* @param conf
* @param command
* @throws SemanticException
*/
public static Operator<? extends OperatorDesc> generateOperatorTree(QueryState queryState, String command) throws SemanticException {
Operator<? extends OperatorDesc> operatorTree;
try {
Context ctx = new Context(queryState.getConf());
ASTNode tree = ParseUtils.parse(command, ctx);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
assert (sem instanceof SemanticAnalyzer);
operatorTree = doSemanticAnalysis((SemanticAnalyzer) sem, tree, ctx);
LOG.info("Sub-query Semantic Analysis Completed");
} catch (IOException e) {
LOG.error("IOException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
} catch (ParseException e) {
LOG.error("ParseException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
} catch (SemanticException e) {
LOG.error("SemanticException in generating the operator " + "tree for input command - " + command + " ", e);
LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
throw new SemanticException(e.getMessage(), e);
}
return operatorTree;
}
use of org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer in project hive by apache.
the class Driver method doAuthorization.
/**
* Do authorization using post semantic analysis information in the semantic analyzer
* The original command is also passed so that authorization interface can provide
* more useful information in logs.
* @param sem SemanticAnalyzer used to parse input query
* @param command input query
* @throws HiveException
* @throws AuthorizationException
*/
public static void doAuthorization(HiveOperation op, BaseSemanticAnalyzer sem, String command) throws HiveException, AuthorizationException {
SessionState ss = SessionState.get();
Hive db = sem.getDb();
Set<ReadEntity> additionalInputs = new HashSet<ReadEntity>();
for (Entity e : sem.getInputs()) {
if (e.getType() == Entity.Type.PARTITION) {
additionalInputs.add(new ReadEntity(e.getTable()));
}
}
Set<WriteEntity> additionalOutputs = new HashSet<WriteEntity>();
for (WriteEntity e : sem.getOutputs()) {
if (e.getType() == Entity.Type.PARTITION) {
additionalOutputs.add(new WriteEntity(e.getTable(), e.getWriteType()));
}
}
// The following union operation returns a union, which traverses over the
// first set once and then then over each element of second set, in order,
// that is not contained in first. This means it doesn't replace anything
// in first set, and would preserve the WriteType in WriteEntity in first
// set in case of outputs list.
Set<ReadEntity> inputs = Sets.union(sem.getInputs(), additionalInputs);
Set<WriteEntity> outputs = Sets.union(sem.getOutputs(), additionalOutputs);
if (ss.isAuthorizationModeV2()) {
// get mapping of tables to columns used
ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo();
// colAccessInfo is set only in case of SemanticAnalyzer
Map<String, List<String>> selectTab2Cols = colAccessInfo != null ? colAccessInfo.getTableToColumnAccessMap() : null;
Map<String, List<String>> updateTab2Cols = sem.getUpdateColumnAccessInfo() != null ? sem.getUpdateColumnAccessInfo().getTableToColumnAccessMap() : null;
doAuthorizationV2(ss, op, inputs, outputs, command, selectTab2Cols, updateTab2Cols);
return;
}
if (op == null) {
throw new HiveException("Operation should not be null");
}
HiveAuthorizationProvider authorizer = ss.getAuthorizer();
if (op.equals(HiveOperation.CREATEDATABASE)) {
authorizer.authorize(op.getInputRequiredPrivileges(), op.getOutputRequiredPrivileges());
} else if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
} else {
if (op.equals(HiveOperation.IMPORT)) {
ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
if (!isa.existsTable()) {
authorizer.authorize(db.getDatabase(SessionState.get().getCurrentDatabase()), null, HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
}
}
}
if (outputs != null && outputs.size() > 0) {
for (WriteEntity write : outputs) {
if (write.isDummy() || write.isPathType()) {
continue;
}
if (write.getType() == Entity.Type.DATABASE) {
if (!op.equals(HiveOperation.IMPORT)) {
// We skip DB check for import here because we already handle it above
// as a CTAS check.
authorizer.authorize(write.getDatabase(), null, op.getOutputRequiredPrivileges());
}
continue;
}
if (write.getType() == WriteEntity.Type.PARTITION) {
Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
if (part != null) {
authorizer.authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
continue;
}
}
if (write.getTable() != null) {
authorizer.authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
}
}
}
if (inputs != null && inputs.size() > 0) {
Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
//determine if partition level privileges should be checked for input tables
Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
for (ReadEntity read : inputs) {
if (read.isDummy() || read.isPathType() || read.getType() == Entity.Type.DATABASE) {
continue;
}
Table tbl = read.getTable();
if ((read.getPartition() != null) || (tbl != null && tbl.isPartitioned())) {
String tblName = tbl.getTableName();
if (tableUsePartLevelAuth.get(tblName) == null) {
boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE".equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
if (usePartLevelPriv) {
tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
} else {
tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
}
}
}
}
// column authorization is checked through table scan operators.
getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth);
// cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
for (ReadEntity read : inputs) {
// if read is not direct, we do not need to check its autho.
if (read.isDummy() || read.isPathType() || !read.isDirect()) {
continue;
}
if (read.getType() == Entity.Type.DATABASE) {
authorizer.authorize(read.getDatabase(), op.getInputRequiredPrivileges(), null);
continue;
}
Table tbl = read.getTable();
if (tbl.isView() && sem instanceof SemanticAnalyzer) {
tab2Cols.put(tbl, sem.getColumnAccessInfo().getTableToColumnAccessMap().get(tbl.getCompleteName()));
}
if (read.getPartition() != null) {
Partition partition = read.getPartition();
tbl = partition.getTable();
// use partition level authorization
if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
List<String> cols = part2Cols.get(partition);
if (cols != null && cols.size() > 0) {
authorizer.authorize(partition.getTable(), partition, cols, op.getInputRequiredPrivileges(), null);
} else {
authorizer.authorize(partition, op.getInputRequiredPrivileges(), null);
}
continue;
}
}
// partitions
if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) && !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) {
List<String> cols = tab2Cols.get(tbl);
if (cols != null && cols.size() > 0) {
authorizer.authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
} else {
authorizer.authorize(tbl, op.getInputRequiredPrivileges(), null);
}
tableAuthChecked.add(tbl.getTableName());
}
}
}
}
use of org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer in project hive by apache.
the class Driver method getTablePartitionUsedColumns.
private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem, Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols, Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
// table to columns mapping (tab2Cols)
if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
ParseContext parseCtx = querySem.getParseContext();
for (Map.Entry<String, TableScanOperator> topOpMap : querySem.getParseContext().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
if (!tableScanOp.isInsideView()) {
Table tbl = tableScanOp.getConf().getTableMetadata();
List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
List<FieldSchema> columns = tbl.getCols();
List<String> cols = new ArrayList<String>();
for (int i = 0; i < neededColumnIds.size(); i++) {
cols.add(columns.get(neededColumnIds.get(i)).getName());
}
// table permission
if (tbl.isPartitioned() && Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp, parseCtx, alias_id);
Set<Partition> parts = partsList.getPartitions();
for (Partition part : parts) {
List<String> existingCols = part2Cols.get(part);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
part2Cols.put(part, existingCols);
}
} else {
List<String> existingCols = tab2Cols.get(tbl);
if (existingCols == null) {
existingCols = new ArrayList<String>();
}
existingCols.addAll(cols);
tab2Cols.put(tbl, existingCols);
}
}
}
}
}
use of org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer in project hive by apache.
the class Driver method compile.
// deferClose indicates if the close/destroy should be deferred when the process has been
// interrupted, it should be set to true if the compile is called within another method like
// runInternal, which defers the close to the called in that method.
public int compile(String command, boolean resetTaskIds, boolean deferClose) {
PerfLogger perfLogger = SessionState.getPerfLogger(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
lDrvState.stateLock.lock();
try {
lDrvState.driverState = DriverState.COMPILING;
} finally {
lDrvState.stateLock.unlock();
}
command = new VariableSubstitution(new HiveVariableSource() {
@Override
public Map<String, String> getHiveVariable() {
return SessionState.get().getHiveVariables();
}
}).substitute(conf, command);
String queryStr = command;
try {
// command should be redacted to avoid to logging sensitive data
queryStr = HookUtils.redactLogString(conf, command);
} catch (Exception e) {
LOG.warn("WARNING! Query command could not be redacted." + e);
}
if (isInterrupted()) {
//indicate if need clean resource
return handleInterruption("at beginning of compilation.");
}
if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
// close the existing ctx etc before compiling a new query, but does not destroy driver
closeInProcess(false);
}
if (resetTaskIds) {
TaskFactory.resetId();
}
String queryId = conf.getVar(HiveConf.ConfVars.HIVEQUERYID);
//save some info for webUI for use after plan is freed
this.queryDisplay.setQueryStr(queryStr);
this.queryDisplay.setQueryId(queryId);
LOG.info("Compiling command(queryId=" + queryId + "): " + queryStr);
SessionState.get().setupQueryCurrentTimestamp();
// Whether any error occurred during query compilation. Used for query lifetime hook.
boolean compileError = false;
try {
// Initialize the transaction manager. This must be done before analyze is called.
final HiveTxnManager txnManager = SessionState.get().initTxnMgr(conf);
// In case when user Ctrl-C twice to kill Hive CLI JVM, we want to release locks
// if compile is being called multiple times, clear the old shutdownhook
ShutdownHookManager.removeShutdownHook(shutdownRunner);
shutdownRunner = new Runnable() {
@Override
public void run() {
try {
releaseLocksAndCommitOrRollback(false, txnManager);
} catch (LockException e) {
LOG.warn("Exception when releasing locks in ShutdownHook for Driver: " + e.getMessage());
}
}
};
ShutdownHookManager.addShutdownHook(shutdownRunner, SHUTDOWN_HOOK_PRIORITY);
if (isInterrupted()) {
return handleInterruption("before parsing and analysing the query");
}
if (ctx == null) {
ctx = new Context(conf);
}
ctx.setTryCount(getTryCount());
ctx.setCmd(command);
ctx.setHDFSCleanup(true);
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARSE);
ASTNode tree = ParseUtils.parse(command, ctx);
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARSE);
// Trigger query hook before compilation
queryHooks = loadQueryHooks();
if (queryHooks != null && !queryHooks.isEmpty()) {
QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
qhc.setHiveConf(conf);
qhc.setCommand(command);
for (QueryLifeTimeHook hook : queryHooks) {
hook.beforeCompile(qhc);
}
}
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.ANALYZE);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, tree);
List<HiveSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, HiveSemanticAnalyzerHook.class);
// Flush the metastore cache. This assures that we don't pick up objects from a previous
// query running in this same thread. This has to be done after we get our semantic
// analyzer (this is when the connection to the metastore is made) but before we analyze,
// because at that point we need access to the objects.
Hive.get().getMSC().flushCache();
// Do semantic analysis and plan generation
if (saHooks != null && !saHooks.isEmpty()) {
HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
hookCtx.setConf(conf);
hookCtx.setUserName(userName);
hookCtx.setIpAddress(SessionState.get().getUserIpAddress());
hookCtx.setCommand(command);
for (HiveSemanticAnalyzerHook hook : saHooks) {
tree = hook.preAnalyze(hookCtx, tree);
}
sem.analyze(tree, ctx);
hookCtx.update(sem);
for (HiveSemanticAnalyzerHook hook : saHooks) {
hook.postAnalyze(hookCtx, sem.getAllRootTasks());
}
} else {
sem.analyze(tree, ctx);
}
// Record any ACID compliant FileSinkOperators we saw so we can add our transaction ID to
// them later.
acidSinks = sem.getAcidFileSinks();
LOG.info("Semantic Analysis Completed");
// validate the plan
sem.validate();
acidInQuery = sem.hasAcidInQuery();
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
if (isInterrupted()) {
return handleInterruption("after analyzing query.");
}
// get the output schema
schema = getSchema(sem, conf);
plan = new QueryPlan(queryStr, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN), queryId, queryState.getHiveOperation(), schema);
conf.setQueryString(queryStr);
conf.set("mapreduce.workflow.id", "hive_" + queryId);
conf.set("mapreduce.workflow.name", queryStr);
// initialize FetchTask right here
if (plan.getFetchTask() != null) {
plan.getFetchTask().initialize(queryState, plan, null, ctx.getOpContext());
}
//do the authorization check
if (!sem.skipAuthorization() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
doAuthorization(queryState.getHiveOperation(), sem, command);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage() + ". Use SHOW GRANT to get more details.");
errorMessage = authExp.getMessage();
SQLState = "42000";
return 403;
} finally {
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.DO_AUTHORIZATION);
}
}
if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
String explainOutput = getExplainOutput(sem, plan, tree);
if (explainOutput != null) {
if (conf.getBoolVar(ConfVars.HIVE_LOG_EXPLAIN_OUTPUT)) {
LOG.info("EXPLAIN output for queryid " + queryId + " : " + explainOutput);
}
if (conf.isWebUiQueryInfoCacheEnabled()) {
queryDisplay.setExplainPlan(explainOutput);
}
}
}
return 0;
} catch (Exception e) {
if (isInterrupted()) {
return handleInterruption("during query compilation: " + e.getMessage());
}
compileError = true;
ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
errorMessage = "FAILED: " + e.getClass().getSimpleName();
if (error != ErrorMsg.GENERIC_ERROR) {
errorMessage += " [Error " + error.getErrorCode() + "]:";
}
// HIVE-4889
if ((e instanceof IllegalArgumentException) && e.getMessage() == null && e.getCause() != null) {
errorMessage += " " + e.getCause().getMessage();
} else {
errorMessage += " " + e.getMessage();
}
if (error == ErrorMsg.TXNMGR_NOT_ACID) {
errorMessage += ". Failed command: " + queryStr;
}
SQLState = error.getSQLState();
downstreamError = e;
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
//todo: this is bad if returned as cmd shell exit
return error.getErrorCode();
// since it exceeds valid range of shell return values
} finally {
// before/after execution hook will never be executed.
try {
if (queryHooks != null && !queryHooks.isEmpty()) {
QueryLifeTimeHookContext qhc = new QueryLifeTimeHookContextImpl();
qhc.setHiveConf(conf);
qhc.setCommand(command);
for (QueryLifeTimeHook hook : queryHooks) {
hook.afterCompile(qhc, compileError);
}
}
} catch (Exception e) {
LOG.warn("Failed when invoking query after-compilation hook.", e);
}
double duration = perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.COMPILE) / 1000.00;
ImmutableMap<String, Long> compileHMSTimings = dumpMetaCallTimingWithoutEx("compilation");
queryDisplay.setHmsTimings(QueryDisplay.Phase.COMPILATION, compileHMSTimings);
boolean isInterrupted = isInterrupted();
if (isInterrupted && !deferClose) {
closeInProcess(true);
}
lDrvState.stateLock.lock();
try {
if (isInterrupted) {
lDrvState.driverState = deferClose ? DriverState.EXECUTING : DriverState.ERROR;
} else {
lDrvState.driverState = compileError ? DriverState.ERROR : DriverState.COMPILED;
}
} finally {
lDrvState.stateLock.unlock();
}
if (isInterrupted) {
LOG.info("Compiling command(queryId=" + queryId + ") has been interrupted after " + duration + " seconds");
} else {
LOG.info("Completed compiling command(queryId=" + queryId + "); Time taken: " + duration + " seconds");
}
}
}
Aggregations