use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.
the class SemanticAnalyzer method analyzeInternal.
void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException {
// 1. Generate Resolved Parse tree from syntax tree
LOG.info("Starting Semantic Analysis");
// change the location of position alias process here
processPositionAlias(ast);
PlannerContext plannerCtx = pcf.create();
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
for (String alias : qb.getSubqAliases()) {
removeOBInSubQuery(qb.getSubqForAlias(alias));
}
}
// Check query results cache.
// If no masking/filtering required, then we can check the cache now, before
// generating the operator tree and going through CBO.
// Otherwise we have to wait until after the masking/filtering step.
boolean isCacheEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED);
QueryResultsCache.LookupInfo lookupInfo = null;
boolean needsTransform = needsTransform();
if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo)) {
return;
}
}
// 2. Gen OP Tree from resolved Parse Tree
Operator sinkOp = genOPTree(ast, plannerCtx);
if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) {
// Here we rewrite the * and also the masking table
ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens);
if (tree != ast) {
plannerCtx = pcf.create();
ctx.setSkipTableMasking(true);
init(true);
// change the location of position alias process here
processPositionAlias(tree);
genResolvedParseTree(tree, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
}
sinkOp = genOPTree(tree, plannerCtx);
}
}
// here, after applying the masking/filtering rewrite rules to the AST.
if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo)) {
return;
}
}
// 3. Deduce Resultset Schema
if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
} else {
// succeeds.
if (resultSchema == null) {
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
}
}
// 4. Generate Parse Context for Optimizer & Physical compiler
copyInfoToQueryProperties(queryProperties);
ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
// Set the semijoin hints in parse context
pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
// Set the mapjoin hint if it needs to be disabled.
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
// 5. Take care of view creation
if (createVwDesc != null) {
if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
return;
}
if (!ctx.isCboSucceeded()) {
saveViewDefinition();
}
// validate the create view statement at this point, the createVwDesc gets
// all the information for semanticcheck
validateCreateView();
if (createVwDesc.isMaterialized()) {
createVwDesc.setTablesUsed(getTablesUsed(pCtx));
} else {
// Since we're only creating a view (not executing it), we don't need to
// optimize or translate the plan (and in fact, those procedures can
// interfere with the view creation). So skip the rest of this method.
ctx.setResDir(null);
ctx.setResFile(null);
try {
PlanUtils.addInputsForView(pCtx);
} catch (HiveException e) {
throw new SemanticException(e);
}
// Generate lineage info for create view statements
// if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
ArrayList<Transform> transformations = new ArrayList<Transform>();
transformations.add(new HiveOpConverterPostProc());
transformations.add(new Generator(postExecHooks));
for (Transform t : transformations) {
pCtx = t.transform(pCtx);
}
// we just use view name as location.
queryState.getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
}
return;
}
}
// 6. Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
}
// 7. Perform Logical optimization
if (LOG.isDebugEnabled()) {
LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
if (pCtx.getColumnAccessInfo() != null) {
// set ColumnAccessInfo for view column authorization
setColumnAccessInfo(pCtx.getColumnAccessInfo());
}
if (LOG.isDebugEnabled()) {
LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
// 8. Generate column access stats if required - wait until column pruning
// takes place during optimization
boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
// view column access info is carried by this.getColumnAccessInfo().
setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
}
// TEZ..)
if (!ctx.getExplainLogical()) {
TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
compiler.init(queryState, console, db);
compiler.compile(pCtx, rootTasks, inputs, outputs);
fetchTask = pCtx.getFetchTask();
}
// find all Acid FileSinkOperatorS
QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
LOG.info("Completed plan generation");
// 10. put accessed columns to readEntity
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
}
if (isCacheEnabled && lookupInfo != null) {
if (queryCanBeCached()) {
QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
// Specify that the results of this query can be cached.
setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.
the class Optimizer method initialize.
/**
* Create the list of transformations.
*
* @param hiveConf
*/
public void initialize(HiveConf hiveConf) {
boolean isTezExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
boolean isSparkExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
boolean bucketMapJoinOptimizer = false;
transformations = new ArrayList<Transform>();
// Add the additional postprocessing transformations needed if
// we are translating Calcite operators into Hive operators.
transformations.add(new HiveOpConverterPostProc());
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(hiveConf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_LINEAGE_INFO) || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
transformations.add(new Generator(postExecHooks));
}
// Try to transform OR predicates in Filter into simpler IN clauses first
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && !pctx.getContext().isCboSucceeded()) {
final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
transformations.add(new PointLookupOptimizer(min));
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) {
transformations.add(new PartitionColumnsSeparator());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && !pctx.getContext().isCboSucceeded()) {
transformations.add(new PredicateTransitivePropagate());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
transformations.add(new ConstantPropagate());
}
transformations.add(new SyntheticJoinPredicate());
transformations.add(new PredicatePushDown());
} else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && pctx.getContext().isCboSucceeded()) {
transformations.add(new SyntheticJoinPredicate());
transformations.add(new SimplePredicatePushDown());
transformations.add(new RedundantDynamicPruningConditionsRemoval());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
// We run constant propagation twice because after predicate pushdown, filter expressions
// are combined and may become eligible for reduction (like is not null filter).
transformations.add(new ConstantPropagate());
}
transformations.add(new SortedDynPartitionTimeGranularityOptimizer());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
transformations.add(new PartitionPruner());
transformations.add(new PartitionConditionRemover());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
/* Add list bucketing pruner. */
transformations.add(new ListBucketingPruner());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
// PartitionPruner may create more folding opportunities, run ConstantPropagate again.
transformations.add(new ConstantPropagate());
}
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) {
transformations.add(new GroupByOptimizer());
}
transformations.add(new ColumnPruner());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVECOUNTDISTINCTOPTIMIZER) && (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_IN_TEST) || isTezExecEngine)) {
transformations.add(new CountDistinctRewriteProc());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) {
if (!isTezExecEngine) {
transformations.add(new SkewJoinOptimizer());
} else {
LOG.warn("Skew join is currently not supported in tez! Disabling the skew join optimization.");
}
}
transformations.add(new SamplePruner());
MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor() : new MapJoinProcessor();
transformations.add(mapJoinProcessor);
if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
transformations.add(new BucketMapJoinOptimizer());
bucketMapJoinOptimizer = true;
}
// BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer
if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
if (!bucketMapJoinOptimizer) {
// No need to add BucketMapJoinOptimizer twice
transformations.add(new BucketMapJoinOptimizer());
}
transformations.add(new SortedMergeBucketMapJoinOptimizer());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
transformations.add(new BucketingSortingReduceSinkOptimizer());
}
transformations.add(new UnionProcessor());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.NWAYJOINREORDER)) {
transformations.add(new JoinReorder());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
final boolean compatMode = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING_COMPAT);
transformations.add(new FixedBucketPruningOptimizer(compatMode));
}
transformations.add(new BucketVersionPopulator());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) && !isTezExecEngine) {
transformations.add(new ReduceSinkDeDuplication());
}
transformations.add(new NonBlockingOpDeDupProc());
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
transformations.add(new IdentityProjectRemover());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) {
transformations.add(new GlobalLimitOptimizer());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && !isTezExecEngine && !isSparkExecEngine) {
transformations.add(new CorrelationOptimizer());
}
if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) {
transformations.add(new LimitPushdownOptimizer());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT)) {
transformations.add(new OrderlessLimitPushDownOptimizer());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) {
transformations.add(new StatsOptimizer());
}
if (pctx.getContext().isExplainSkipExecution() && !isTezExecEngine && !isSparkExecEngine) {
transformations.add(new AnnotateWithStatistics());
transformations.add(new AnnotateWithOpTraits());
}
if (!HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKCONVERSION).equals("none")) {
// must be called last
transformations.add(new SimpleFetchOptimizer());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) {
transformations.add(new SimpleFetchAggregation());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_TABLE_PROPERTIES_FROM_SERDE)) {
transformations.add(new TablePropertyEnrichmentOptimizer());
}
}
use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.
the class SemanticAnalyzer method analyzeInternal.
void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
// 1. Generate Resolved Parse tree from syntax tree
LOG.info("Starting Semantic Analysis");
//change the location of position alias process here
processPositionAlias(ast);
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
// 2. Gen OP Tree from resolved Parse Tree
Operator sinkOp = genOPTree(ast, plannerCtx);
if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
// Here we rewrite the * and also the masking table
ASTNode tree = rewriteASTWithMaskAndFilter(ast);
if (tree != ast) {
ctx.setSkipTableMasking(true);
init(true);
//change the location of position alias process here
processPositionAlias(tree);
genResolvedParseTree(tree, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
}
sinkOp = genOPTree(tree, plannerCtx);
}
}
// 3. Deduce Resultset Schema
if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
} else {
// succeeds.
if (resultSchema == null) {
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
}
}
// 4. Generate Parse Context for Optimizer & Physical compiler
copyInfoToQueryProperties(queryProperties);
ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
// 5. Take care of view creation
if (createVwDesc != null) {
if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
return;
}
if (!ctx.isCboSucceeded()) {
saveViewDefinition();
}
// validate the create view statement at this point, the createVwDesc gets
// all the information for semanticcheck
validateCreateView();
if (!createVwDesc.isMaterialized()) {
// Since we're only creating a view (not executing it), we don't need to
// optimize or translate the plan (and in fact, those procedures can
// interfere with the view creation). So skip the rest of this method.
ctx.setResDir(null);
ctx.setResFile(null);
try {
PlanUtils.addInputsForView(pCtx);
} catch (HiveException e) {
throw new SemanticException(e);
}
// Generate lineage info for create view statements
// if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
ArrayList<Transform> transformations = new ArrayList<Transform>();
transformations.add(new HiveOpConverterPostProc());
transformations.add(new Generator());
for (Transform t : transformations) {
pCtx = t.transform(pCtx);
}
// we just use view name as location.
SessionState.get().getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
}
return;
}
}
// 6. Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
}
// 7. Perform Logical optimization
if (LOG.isDebugEnabled()) {
LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
if (pCtx.getColumnAccessInfo() != null) {
// set ColumnAccessInfo for view column authorization
setColumnAccessInfo(pCtx.getColumnAccessInfo());
}
FetchTask origFetchTask = pCtx.getFetchTask();
if (LOG.isDebugEnabled()) {
LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
// 8. Generate column access stats if required - wait until column pruning
// takes place during optimization
boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
// view column access info is carried by this.getColumnAccessInfo().
setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
}
// TEZ..)
if (!ctx.getExplainLogical()) {
TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
compiler.init(queryState, console, db);
compiler.compile(pCtx, rootTasks, inputs, outputs);
fetchTask = pCtx.getFetchTask();
}
LOG.info("Completed plan generation");
// 10. put accessed columns to readEntity
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
}
// 11. if desired check we're not going over partition scan limits
if (!ctx.isExplainSkipExecution()) {
enforceScanLimits(pCtx, origFetchTask);
}
return;
}
use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.
the class SemanticAnalyzer method analyzeInternal.
@SuppressWarnings("checkstyle:methodlength")
void analyzeInternal(ASTNode ast, Supplier<PlannerContext> pcf) throws SemanticException {
LOG.info("Starting Semantic Analysis");
// 1. Generate Resolved Parse tree from syntax tree
boolean needsTransform = needsTransform();
// change the location of position alias process here
processPositionAlias(ast);
cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr());
PlannerContext plannerCtx = pcf.get();
if (!genResolvedParseTree(ast, plannerCtx)) {
return;
}
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
for (String alias : qb.getSubqAliases()) {
removeOBInSubQuery(qb.getSubqForAlias(alias));
}
}
final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT);
if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) {
if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) {
conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false);
LOG.info("Disabling LLAP IO encode as ETL query is detected");
} else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) {
conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false);
LOG.info("Disabling LLAP IO as ETL query is detected");
}
}
// Check query results cache.
// If no masking/filtering required, then we can check the cache now, before
// generating the operator tree and going through CBO.
// Otherwise we have to wait until after the masking/filtering step.
boolean isCacheEnabled = isResultsCacheEnabled();
QueryResultsCache.LookupInfo lookupInfo = null;
if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo, false)) {
return;
}
}
ASTNode astForMasking;
if (isCBOExecuted() && needsTransform && (qb.isCTAS() || forViewCreation || qb.isMaterializedView() || qb.isMultiDestQuery())) {
// If we use CBO and we may apply masking/filtering policies, we create a copy of the ast.
// The reason is that the generation of the operator tree may modify the initial ast,
// but if we need to parse for a second time, we would like to parse the unmodified ast.
astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast);
} else {
astForMasking = ast;
}
// 2. Gen OP Tree from resolved Parse Tree
sinkOp = genOPTree(ast, plannerCtx);
boolean usesMasking = false;
if (!forViewCreation && ast.getToken().getType() != HiveParser.TOK_CREATE_MATERIALIZED_VIEW && (tableMask.isEnabled() && analyzeRewrite == null)) {
// Here we rewrite the * and also the masking table
ParseResult rewrittenResult = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), ctx, db);
ASTNode rewrittenAST = rewrittenResult.getTree();
if (astForMasking != rewrittenAST) {
usesMasking = true;
plannerCtx = pcf.get();
ctx.setSkipTableMasking(true);
ctx.setTokenRewriteStream(rewrittenResult.getTokenRewriteStream());
init(true);
// change the location of position alias process here
processPositionAlias(rewrittenAST);
genResolvedParseTree(rewrittenAST, plannerCtx);
if (this instanceof CalcitePlanner) {
((CalcitePlanner) this).resetCalciteConfiguration();
}
sinkOp = genOPTree(rewrittenAST, plannerCtx);
}
}
// validate if this sink operation is allowed for non-native tables
if (sinkOp instanceof FileSinkOperator) {
FileSinkOperator fileSinkOperator = (FileSinkOperator) sinkOp;
Optional<HiveStorageHandler> handler = Optional.ofNullable(fileSinkOperator).map(FileSinkOperator::getConf).map(FileSinkDesc::getTable).map(Table::getStorageHandler);
if (handler.isPresent()) {
handler.get().validateSinkDesc(fileSinkOperator.getConf());
}
}
// TODO: Enable caching for queries with masking/filtering
if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) {
lookupInfo = createLookupInfoForQuery(ast);
if (checkResultsCache(lookupInfo, false)) {
return;
}
}
// 3. Deduce Resultset Schema
if ((forViewCreation || createVwDesc != null) && !this.ctx.isCboSucceeded()) {
resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
} else {
// succeeds.
if (resultSchema == null) {
resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
}
}
// 4. Generate Parse Context for Optimizer & Physical compiler
copyInfoToQueryProperties(queryProperties);
ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema);
// Set the semijoin hints in parse context
pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
// Set the mapjoin hint if it needs to be disabled.
pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
if (forViewCreation) {
// Generate lineage info if LineageLogger hook is configured.
// Add the transformation that computes the lineage information.
Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
List<Transform> transformations = new ArrayList<Transform>();
transformations.add(new HiveOpConverterPostProc());
transformations.add(new Generator(postExecHooks));
for (Transform t : transformations) {
pCtx = t.transform(pCtx);
}
}
}
// 5. Take care of view creation
if (createVwDesc != null) {
if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
return;
}
if (!ctx.isCboSucceeded()) {
saveViewDefinition();
}
// validate the create view statement at this point, the createVwDesc gets
// all the information for semanticcheck
validateCreateView();
createVwDesc.setTablesUsed(pCtx.getTablesUsed());
}
// it means that in step 2, the ColumnAccessInfo was already created
if (!forViewCreation || getColumnAccessInfo() == null) {
// 6. Generate table access stats if required
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
}
AuxOpTreeSignature.linkAuxSignatures(pCtx);
// 7. Perform Logical optimization
if (LOG.isDebugEnabled()) {
LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
Optimizer optm = new Optimizer();
optm.setPctx(pCtx);
optm.initialize(conf);
pCtx = optm.optimize();
if (pCtx.getColumnAccessInfo() != null) {
// set ColumnAccessInfo for view column authorization
setColumnAccessInfo(pCtx.getColumnAccessInfo());
}
if (LOG.isDebugEnabled()) {
LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
}
// 8. Generate column access stats if required - wait until column pruning
// takes place during optimization
boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
// view column access info is carried by this.getColumnAccessInfo().
setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
}
}
if (forViewCreation) {
return;
}
// 9. Optimize Physical op tree & Translate to target execution engine (MR,
// TEZ..)
compilePlan(pCtx);
// find all Acid FileSinkOperatorS
new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
// 10. Attach CTAS/Insert-Commit-hooks for Storage Handlers
final Optional<TezTask> optionalTezTask = rootTasks.stream().filter(task -> task instanceof TezTask).map(task -> (TezTask) task).findFirst();
if (optionalTezTask.isPresent()) {
final TezTask tezTask = optionalTezTask.get();
rootTasks.stream().filter(task -> task.getWork() instanceof DDLWork).map(task -> (DDLWork) task.getWork()).filter(ddlWork -> ddlWork.getDDLDesc() instanceof PreInsertTableDesc).map(ddlWork -> (PreInsertTableDesc) ddlWork.getDDLDesc()).map(desc -> new InsertCommitHookDesc(desc.getTable(), desc.isOverwrite())).forEach(insertCommitHookDesc -> tezTask.addDependentTask(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), insertCommitHookDesc), conf)));
}
LOG.info("Completed plan generation");
// 11. put accessed columns to readEntity
if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
}
if (isCacheEnabled && lookupInfo != null) {
if (queryCanBeCached()) {
// requires SemanticAnalyzer state to be reset.
if (checkResultsCache(lookupInfo, true)) {
LOG.info("Cached result found on second lookup");
} else {
QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
// Specify that the results of this query can be cached.
setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
}
}
}
}
Aggregations