Examples with Generator - org.apache.hadoop.hive.ql.optimizer.lineage.Generator

Example 1 with Generator

use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    // change the location of position alias process here
    processPositionAlias(ast);
    PlannerContext plannerCtx = pcf.create();
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
        for (String alias : qb.getSubqAliases()) {
            removeOBInSubQuery(qb.getSubqForAlias(alias));
        }
    }
    // Check query results cache.
    // If no masking/filtering required, then we can check the cache now, before
    // generating the operator tree and going through CBO.
    // Otherwise we have to wait until after the masking/filtering step.
    boolean isCacheEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED);
    QueryResultsCache.LookupInfo lookupInfo = null;
    boolean needsTransform = needsTransform();
    if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo)) {
            return;
        }
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens);
        if (tree != ast) {
            plannerCtx = pcf.create();
            ctx.setSkipTableMasking(true);
            init(true);
            // change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // here, after applying the masking/filtering rewrite rules to the AST.
    if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo)) {
            return;
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // Set the semijoin hints in parse context
    pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
    // Set the mapjoin hint if it needs to be disabled.
    pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (createVwDesc.isMaterialized()) {
            createVwDesc.setTablesUsed(getTablesUsed(pCtx));
        } else {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator(postExecHooks));
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                queryState.getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    // find all Acid FileSinkOperatorS
    QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    if (isCacheEnabled && lookupInfo != null) {
        if (queryCanBeCached()) {
            QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
            // Specify that the results of this query can be cached.
            setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
        }
    }
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) QueryPlanPostProcessor(org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) QueryResultsCache(org.apache.hadoop.hive.ql.cache.results.QueryResultsCache) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage) Path(org.apache.hadoop.fs.Path) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 2 with Generator

use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.

the class Optimizer method initialize.

/**
 * Create the list of transformations.
 *
 * @param hiveConf
 */
public void initialize(HiveConf hiveConf) {
    boolean isTezExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    boolean isSparkExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
    boolean bucketMapJoinOptimizer = false;
    transformations = new ArrayList<Transform>();
    // Add the additional postprocessing transformations needed if
    // we are translating Calcite operators into Hive operators.
    transformations.add(new HiveOpConverterPostProc());
    // Add the transformation that computes the lineage information.
    Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(hiveConf, HiveConf.ConfVars.POSTEXECHOOKS))));
    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_LINEAGE_INFO) || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
        transformations.add(new Generator(postExecHooks));
    }
    // Try to transform OR predicates in Filter into simpler IN clauses first
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER) && !pctx.getContext().isCboSucceeded()) {
        final int min = HiveConf.getIntVar(hiveConf, HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
        transformations.add(new PointLookupOptimizer(min));
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEPARTITIONCOLUMNSEPARATOR)) {
        transformations.add(new PartitionColumnsSeparator());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && !pctx.getContext().isCboSucceeded()) {
        transformations.add(new PredicateTransitivePropagate());
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
            transformations.add(new ConstantPropagate());
        }
        transformations.add(new SyntheticJoinPredicate());
        transformations.add(new PredicatePushDown());
    } else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && pctx.getContext().isCboSucceeded()) {
        transformations.add(new SyntheticJoinPredicate());
        transformations.add(new SimplePredicatePushDown());
        transformations.add(new RedundantDynamicPruningConditionsRemoval());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
        // We run constant propagation twice because after predicate pushdown, filter expressions
        // are combined and may become eligible for reduction (like is not null filter).
        transformations.add(new ConstantPropagate());
    }
    transformations.add(new SortedDynPartitionTimeGranularityOptimizer());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)) {
        transformations.add(new PartitionPruner());
        transformations.add(new PartitionConditionRemover());
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) {
            /* Add list bucketing pruner. */
            transformations.add(new ListBucketingPruner());
        }
        if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) && !pctx.getContext().isCboSucceeded()) {
            // PartitionPruner may create more folding opportunities, run ConstantPropagate again.
            transformations.add(new ConstantPropagate());
        }
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_MAP_GROUPBY_SORT)) {
        transformations.add(new GroupByOptimizer());
    }
    transformations.add(new ColumnPruner());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVECOUNTDISTINCTOPTIMIZER) && (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_IN_TEST) || isTezExecEngine)) {
        transformations.add(new CountDistinctRewriteProc());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME)) {
        if (!isTezExecEngine) {
            transformations.add(new SkewJoinOptimizer());
        } else {
            LOG.warn("Skew join is currently not supported in tez! Disabling the skew join optimization.");
        }
    }
    transformations.add(new SamplePruner());
    MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor() : new MapJoinProcessor();
    transformations.add(mapJoinProcessor);
    if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new BucketMapJoinOptimizer());
        bucketMapJoinOptimizer = true;
    }
    // BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer
    if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)) && !isTezExecEngine && !isSparkExecEngine) {
        if (!bucketMapJoinOptimizer) {
            // No need to add BucketMapJoinOptimizer twice
            transformations.add(new BucketMapJoinOptimizer());
        }
        transformations.add(new SortedMergeBucketMapJoinOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEBUCKETINGSORTING)) {
        transformations.add(new BucketingSortingReduceSinkOptimizer());
    }
    transformations.add(new UnionProcessor());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.NWAYJOINREORDER)) {
        transformations.add(new JoinReorder());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) {
        final boolean compatMode = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.TEZ_OPTIMIZE_BUCKET_PRUNING_COMPAT);
        transformations.add(new FixedBucketPruningOptimizer(compatMode));
    }
    transformations.add(new BucketVersionPopulator());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION) && !isTezExecEngine) {
        transformations.add(new ReduceSinkDeDuplication());
    }
    transformations.add(new NonBlockingOpDeDupProc());
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
        transformations.add(new IdentityProjectRemover());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) {
        transformations.add(new GlobalLimitOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCORRELATION) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEGROUPBYSKEW) && !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME) && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new CorrelationOptimizer());
    }
    if (HiveConf.getFloatVar(hiveConf, HiveConf.ConfVars.HIVELIMITPUSHDOWNMEMORYUSAGE) > 0) {
        transformations.add(new LimitPushdownOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT)) {
        transformations.add(new OrderlessLimitPushDownOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) {
        transformations.add(new StatsOptimizer());
    }
    if (pctx.getContext().isExplainSkipExecution() && !isTezExecEngine && !isSparkExecEngine) {
        transformations.add(new AnnotateWithStatistics());
        transformations.add(new AnnotateWithOpTraits());
    }
    if (!HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKCONVERSION).equals("none")) {
        // must be called last
        transformations.add(new SimpleFetchOptimizer());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEFETCHTASKAGGR)) {
        transformations.add(new SimpleFetchAggregation());
    }
    if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_OPTIMIZE_TABLE_PROPERTIES_FROM_SERDE)) {
        transformations.add(new TablePropertyEnrichmentOptimizer());
    }
}

Also used : ReduceSinkDeDuplication(org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication) AnnotateWithOpTraits(org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits) PredicateTransitivePropagate(org.apache.hadoop.hive.ql.ppd.PredicateTransitivePropagate) PartitionConditionRemover(org.apache.hadoop.hive.ql.optimizer.pcr.PartitionConditionRemover) UnionProcessor(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcessor) SimplePredicatePushDown(org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown) SimplePredicatePushDown(org.apache.hadoop.hive.ql.ppd.SimplePredicatePushDown) PredicatePushDown(org.apache.hadoop.hive.ql.ppd.PredicatePushDown) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) PartitionPruner(org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner) ListBucketingPruner(org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPruner) SyntheticJoinPredicate(org.apache.hadoop.hive.ql.ppd.SyntheticJoinPredicate) CorrelationOptimizer(org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer) AnnotateWithStatistics(org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 3 with Generator

use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    //change the location of position alias process here
    processPositionAlias(ast);
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(ast);
        if (tree != ast) {
            ctx.setSkipTableMasking(true);
            init(true);
            //change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (!createVwDesc.isMaterialized()) {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator());
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                SessionState.get().getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    FetchTask origFetchTask = pCtx.getFetchTask();
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    // 11. if desired check we're not going over partition scan limits
    if (!ctx.isExplainSkipExecution()) {
        enforceScanLimits(pCtx, origFetchTask);
    }
    return;
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 4 with Generator

use of org.apache.hadoop.hive.ql.optimizer.lineage.Generator in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

@SuppressWarnings("checkstyle:methodlength")
void analyzeInternal(ASTNode ast, Supplier<PlannerContext> pcf) throws SemanticException {
    LOG.info("Starting Semantic Analysis");
    // 1. Generate Resolved Parse tree from syntax tree
    boolean needsTransform = needsTransform();
    // change the location of position alias process here
    processPositionAlias(ast);
    cacheTableHelper.populateCache(ctx.getParsedTables(), conf, getTxnMgr());
    PlannerContext plannerCtx = pcf.get();
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
        for (String alias : qb.getSubqAliases()) {
            removeOBInSubQuery(qb.getSubqForAlias(alias));
        }
    }
    final String llapIOETLSkipFormat = HiveConf.getVar(conf, ConfVars.LLAP_IO_ETL_SKIP_FORMAT);
    if (qb.getParseInfo().hasInsertTables() || qb.isCTAS()) {
        if (llapIOETLSkipFormat.equalsIgnoreCase("encode")) {
            conf.setBoolean(ConfVars.LLAP_IO_ENCODE_ENABLED.varname, false);
            LOG.info("Disabling LLAP IO encode as ETL query is detected");
        } else if (llapIOETLSkipFormat.equalsIgnoreCase("all")) {
            conf.setBoolean(ConfVars.LLAP_IO_ENABLED.varname, false);
            LOG.info("Disabling LLAP IO as ETL query is detected");
        }
    }
    // Check query results cache.
    // If no masking/filtering required, then we can check the cache now, before
    // generating the operator tree and going through CBO.
    // Otherwise we have to wait until after the masking/filtering step.
    boolean isCacheEnabled = isResultsCacheEnabled();
    QueryResultsCache.LookupInfo lookupInfo = null;
    if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo, false)) {
            return;
        }
    }
    ASTNode astForMasking;
    if (isCBOExecuted() && needsTransform && (qb.isCTAS() || forViewCreation || qb.isMaterializedView() || qb.isMultiDestQuery())) {
        // If we use CBO and we may apply masking/filtering policies, we create a copy of the ast.
        // The reason is that the generation of the operator tree may modify the initial ast,
        // but if we need to parse for a second time, we would like to parse the unmodified ast.
        astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast);
    } else {
        astForMasking = ast;
    }
    // 2. Gen OP Tree from resolved Parse Tree
    sinkOp = genOPTree(ast, plannerCtx);
    boolean usesMasking = false;
    if (!forViewCreation && ast.getToken().getType() != HiveParser.TOK_CREATE_MATERIALIZED_VIEW && (tableMask.isEnabled() && analyzeRewrite == null)) {
        // Here we rewrite the * and also the masking table
        ParseResult rewrittenResult = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), ctx, db);
        ASTNode rewrittenAST = rewrittenResult.getTree();
        if (astForMasking != rewrittenAST) {
            usesMasking = true;
            plannerCtx = pcf.get();
            ctx.setSkipTableMasking(true);
            ctx.setTokenRewriteStream(rewrittenResult.getTokenRewriteStream());
            init(true);
            // change the location of position alias process here
            processPositionAlias(rewrittenAST);
            genResolvedParseTree(rewrittenAST, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(rewrittenAST, plannerCtx);
        }
    }
    // validate if this sink operation is allowed for non-native tables
    if (sinkOp instanceof FileSinkOperator) {
        FileSinkOperator fileSinkOperator = (FileSinkOperator) sinkOp;
        Optional<HiveStorageHandler> handler = Optional.ofNullable(fileSinkOperator).map(FileSinkOperator::getConf).map(FileSinkDesc::getTable).map(Table::getStorageHandler);
        if (handler.isPresent()) {
            handler.get().validateSinkDesc(fileSinkOperator.getConf());
        }
    }
    // TODO: Enable caching for queries with masking/filtering
    if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo, false)) {
            return;
        }
    }
    // 3. Deduce Resultset Schema
    if ((forViewCreation || createVwDesc != null) && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema);
    // Set the semijoin hints in parse context
    pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
    // Set the mapjoin hint if it needs to be disabled.
    pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
    if (forViewCreation) {
        // Generate lineage info if LineageLogger hook is configured.
        // Add the transformation that computes the lineage information.
        Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
        if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
            List<Transform> transformations = new ArrayList<Transform>();
            transformations.add(new HiveOpConverterPostProc());
            transformations.add(new Generator(postExecHooks));
            for (Transform t : transformations) {
                pCtx = t.transform(pCtx);
            }
        }
    }
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        createVwDesc.setTablesUsed(pCtx.getTablesUsed());
    }
    // it means that in step 2, the ColumnAccessInfo was already created
    if (!forViewCreation || getColumnAccessInfo() == null) {
        // 6. Generate table access stats if required
        if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
            TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
            setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
        }
        AuxOpTreeSignature.linkAuxSignatures(pCtx);
        // 7. Perform Logical optimization
        if (LOG.isDebugEnabled()) {
            LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
        }
        Optimizer optm = new Optimizer();
        optm.setPctx(pCtx);
        optm.initialize(conf);
        pCtx = optm.optimize();
        if (pCtx.getColumnAccessInfo() != null) {
            // set ColumnAccessInfo for view column authorization
            setColumnAccessInfo(pCtx.getColumnAccessInfo());
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
        }
        // 8. Generate column access stats if required - wait until column pruning
        // takes place during optimization
        boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
        if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
            ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
            // view column access info is carried by this.getColumnAccessInfo().
            setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
        }
    }
    if (forViewCreation) {
        return;
    }
    // 9. Optimize Physical op tree & Translate to target execution engine (MR,
    // TEZ..)
    compilePlan(pCtx);
    // find all Acid FileSinkOperatorS
    new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
    // 10. Attach CTAS/Insert-Commit-hooks for Storage Handlers
    final Optional<TezTask> optionalTezTask = rootTasks.stream().filter(task -> task instanceof TezTask).map(task -> (TezTask) task).findFirst();
    if (optionalTezTask.isPresent()) {
        final TezTask tezTask = optionalTezTask.get();
        rootTasks.stream().filter(task -> task.getWork() instanceof DDLWork).map(task -> (DDLWork) task.getWork()).filter(ddlWork -> ddlWork.getDDLDesc() instanceof PreInsertTableDesc).map(ddlWork -> (PreInsertTableDesc) ddlWork.getDDLDesc()).map(desc -> new InsertCommitHookDesc(desc.getTable(), desc.isOverwrite())).forEach(insertCommitHookDesc -> tezTask.addDependentTask(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), insertCommitHookDesc), conf)));
    }
    LOG.info("Completed plan generation");
    // 11. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    if (isCacheEnabled && lookupInfo != null) {
        if (queryCanBeCached()) {
            // requires SemanticAnalyzer state to be reset.
            if (checkResultsCache(lookupInfo, true)) {
                LOG.info("Cached result found on second lookup");
            } else {
                QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
                // Specify that the results of this query can be cached.
                setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
            }
        }
    }
}

Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) QueryPlanPostProcessor(org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayListMultimap(com.google.common.collect.ArrayListMultimap) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) FileStatus(org.apache.hadoop.fs.FileStatus) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) StringUtils(org.apache.commons.lang3.StringUtils) TypeCheckCtx(org.apache.hadoop.hive.ql.parse.type.TypeCheckCtx) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) IntMath(com.google.common.math.IntMath) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) ArchiveUtils(org.apache.hadoop.hive.ql.exec.ArchiveUtils) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) HadoopShims(org.apache.hadoop.hive.shims.HadoopShims) QueryProperties(org.apache.hadoop.hive.ql.QueryProperties) OrderExpression(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression) UDTFDesc(org.apache.hadoop.hive.ql.plan.UDTFDesc) GenericUDFSurrogateKey(org.apache.hadoop.hive.ql.udf.generic.GenericUDFSurrogateKey) TokenRewriteStream(org.antlr.runtime.TokenRewriteStream) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) HivePrivilegeObject(org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject) AbstractCreateViewAnalyzer.validateTablesUsed(org.apache.hadoop.hive.ql.ddl.view.create.AbstractCreateViewAnalyzer.validateTablesUsed) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) FileUtils(org.apache.hadoop.hive.common.FileUtils) AuxOpTreeSignature(org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature) ErrorMsg(org.apache.hadoop.hive.ql.ErrorMsg) ScriptDesc(org.apache.hadoop.hive.ql.plan.ScriptDesc) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) Supplier(java.util.function.Supplier) LinkedHashMap(java.util.LinkedHashMap) ExprNodeFieldDesc(org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc) Strings(com.google.common.base.Strings) ConstraintsUtils(org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils) Lists(com.google.common.collect.Lists) SemanticDispatcher(org.apache.hadoop.hive.ql.lib.SemanticDispatcher) SemanticGraphWalker(org.apache.hadoop.hive.ql.lib.SemanticGraphWalker) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) TypeCheckProcFactory(org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) Constants(org.apache.hadoop.hive.conf.Constants) PartitionedTableFunctionSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec) NullOrdering(org.apache.hadoop.hive.ql.util.NullOrdering) SubQueryType(org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType) HiveUtils(org.apache.hadoop.hive.ql.metadata.HiveUtils) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) PTFQueryInputType(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputType) IOException(java.io.IOException) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) LoadFileType(org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TreeMap(java.util.TreeMap) DirectionUtils(org.apache.hadoop.hive.ql.util.DirectionUtils) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) PTFExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef) SortedSet(java.util.SortedSet) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) WindowFrameSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) Description(org.apache.hadoop.hive.ql.exec.Description) RecordWriter(org.apache.hadoop.hive.ql.exec.RecordWriter) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) PartitionExpression(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression) HiveIgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat) Token(org.antlr.runtime.Token) ReduceField(org.apache.hadoop.hive.ql.exec.Utilities.ReduceField) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) UnionProcContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext) UnionDesc(org.apache.hadoop.hive.ql.plan.UnionDesc) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) PartitioningSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec) Collection(java.util.Collection) Order(org.apache.hadoop.hive.metastore.api.Order) UUID(java.util.UUID) SQLPrimaryKey(org.apache.hadoop.hive.metastore.api.SQLPrimaryKey) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) LongMath(com.google.common.math.LongMath) HiveOutputFormat(org.apache.hadoop.hive.ql.io.HiveOutputFormat) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) SessionState(org.apache.hadoop.hive.ql.session.SessionState) ASTBuilder(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder) Entry(java.util.Map.Entry) Queue(java.util.Queue) Objects.nonNull(java.util.Objects.nonNull) ISubQueryJoinInfo(org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DynamicPartitionCtx(org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx) ResultFileFormat(org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) HIVE_DEFAULT_STORAGE_HANDLER(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DEFAULT_STORAGE_HANDLER) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TABLE_IS_CTAS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.TABLE_IS_CTAS) DefaultGraphWalker(org.apache.hadoop.hive.ql.lib.DefaultGraphWalker) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) HashSet(java.util.HashSet) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) OrderExpressionDef(org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef) Utils(org.apache.hadoop.hive.shims.Utils) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) LinkedList(java.util.LinkedList) TreeVisitorAction(org.antlr.runtime.tree.TreeVisitorAction) NullStructSerDe(org.apache.hadoop.hive.serde2.NullStructSerDe) SerDeUtils(org.apache.hadoop.hive.serde2.SerDeUtils) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) WriteType(org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ForwardDesc(org.apache.hadoop.hive.ql.plan.ForwardDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) StrictChecks(org.apache.hadoop.hive.conf.HiveConf.StrictChecks) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ExprNodeDescUtils(org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils) ExprNodeColumnListDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc) MetricsConstant(org.apache.hadoop.hive.common.metrics.common.MetricsConstant) WindowFunctionInfo(org.apache.hadoop.hive.ql.exec.WindowFunctionInfo) Arrays(java.util.Arrays) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GenericUDFMurmurHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFMurmurHash) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) TaskFactory(org.apache.hadoop.hive.ql.exec.TaskFactory) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) StatDB(org.apache.hadoop.hive.common.StatsSetupConst.StatDB) ExprNodeEvaluatorFactory(org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) FunctionInfo(org.apache.hadoop.hive.ql.exec.FunctionInfo) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) AlterTableUnsetPropertiesDesc(org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) GenericUDFOPOr(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr) ArrowColumnarBatchSerDe(org.apache.hadoop.hive.ql.io.arrow.ArrowColumnarBatchSerDe) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) GenericUDFArray(org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray) HiveOperation(org.apache.hadoop.hive.ql.plan.HiveOperation) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ClassicToken(org.antlr.runtime.ClassicToken) GenericUDFHash(org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash) QueryResultsCache(org.apache.hadoop.hive.ql.cache.results.QueryResultsCache) Direction(org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction) SQLForeignKey(org.apache.hadoop.hive.metastore.api.SQLForeignKey) TreeSet(java.util.TreeSet) FsAction(org.apache.hadoop.fs.permission.FsAction) ArrayList(java.util.ArrayList) Task(org.apache.hadoop.hive.ql.exec.Task) UnsupportedFeature(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) BoundarySpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) StringInternUtils(org.apache.hadoop.hive.common.StringInternUtils) PlanUtils(org.apache.hadoop.hive.ql.plan.PlanUtils) DYNAMICPARTITIONCONVERT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONVERT) WindowSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec) LateralViewJoinDesc(org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) Table(org.apache.hadoop.hive.ql.metadata.Table) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Node(org.apache.hadoop.hive.ql.lib.Node) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) IOUtils(org.apache.hadoop.io.IOUtils) MaterializedViewUpdateDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) OperatorFactory(org.apache.hadoop.hive.ql.exec.OperatorFactory) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) PartitionSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec) TableType(org.apache.hadoop.hive.metastore.TableType) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ArrayDeque(java.util.ArrayDeque) CommonToken(org.antlr.runtime.CommonToken) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ExprNodeTypeCheck(org.apache.hadoop.hive.ql.parse.type.ExprNodeTypeCheck) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ResourceDownloader(org.apache.hadoop.hive.ql.util.ResourceDownloader) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) DummyPartition(org.apache.hadoop.hive.ql.metadata.DummyPartition) PTFInputSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFInputSpec) HIVESTATSDBCLASS(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS) PartitionedTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) PTFDesc(org.apache.hadoop.hive.ql.plan.PTFDesc) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Splitter(com.google.common.base.Splitter) OutputFormat(org.apache.hadoop.mapred.OutputFormat) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) PatternSyntaxException(java.util.regex.PatternSyntaxException) TreeVisitor(org.antlr.runtime.tree.TreeVisitor) ImmutableMap(com.google.common.collect.ImmutableMap) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) FileNotFoundException(java.io.FileNotFoundException) Sets(com.google.common.collect.Sets) SkewedTableUtils(org.apache.hadoop.hive.ql.ddl.table.storage.skewed.SkewedTableUtils) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) DelimitedJSONSerDe(org.apache.hadoop.hive.serde2.DelimitedJSONSerDe) List(java.util.List) VirtualColumn(org.apache.hadoop.hive.ql.metadata.VirtualColumn) DbTxnManager(org.apache.hadoop.hive.ql.lockmgr.DbTxnManager) DFSUtilClient(org.apache.hadoop.hdfs.DFSUtilClient) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) HIVEARCHIVEENABLED(org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEARCHIVEENABLED) WindowExpressionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) AccessControlException(java.security.AccessControlException) Optional(java.util.Optional) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) Pattern(java.util.regex.Pattern) SortedMap(java.util.SortedMap) WindowFunctionSpec(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec) InsertCommitHookDesc(org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc) GenericUDTF(org.apache.hadoop.hive.ql.udf.generic.GenericUDTF) HashMap(java.util.HashMap) Deque(java.util.Deque) Multimap(com.google.common.collect.Multimap) ReadEntity(org.apache.hadoop.hive.ql.hooks.ReadEntity) GenericUDFCardinalityViolation(org.apache.hadoop.hive.ql.udf.generic.GenericUDFCardinalityViolation) TransactionalValidationListener(org.apache.hadoop.hive.metastore.TransactionalValidationListener) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) AnalyzeState(org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState) CollectionUtils(org.apache.commons.collections.CollectionUtils) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) QueryState(org.apache.hadoop.hive.ql.QueryState) CreateTableLikeDesc(org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc) SessionHiveMetaStoreClient(org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient) TableName(org.apache.hadoop.hive.common.TableName) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) Entity(org.apache.hadoop.hive.ql.hooks.Entity) FileInputFormat(org.apache.hadoop.mapreduce.lib.input.FileInputFormat) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) WindowType(org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowType) SampleDesc(org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc) Hive(org.apache.hadoop.hive.ql.metadata.Hive) Iterator(java.util.Iterator) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) NullRowsInputFormat(org.apache.hadoop.hive.ql.io.NullRowsInputFormat) PTFQueryInputSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PTFQueryInputSpec) ResourceType(org.apache.hadoop.hive.ql.session.SessionState.ResourceType) QueryPlanPostProcessor(org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor) SpecType(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec.SpecType) Tree(org.antlr.runtime.tree.Tree) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) GenericUDTFInline(org.apache.hadoop.hive.ql.udf.generic.GenericUDTFInline) LateralViewForwardDesc(org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc) OrderSpec(org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec) NON_FK_FILTERED(org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.NON_FK_FILTERED) LazyBinarySerDe2(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe2) Collections(java.util.Collections) Database(org.apache.hadoop.hive.metastore.api.Database) RecordReader(org.apache.hadoop.hive.ql.exec.RecordReader) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) InsertCommitHookDesc(org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) QueryResultsCache(org.apache.hadoop.hive.ql.cache.results.QueryResultsCache) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) PreInsertTableDesc(org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage) HiveStorageHandler(org.apache.hadoop.hive.ql.metadata.HiveStorageHandler) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.ql.metadata.Table) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Aggregations

HiveOpConverterPostProc (org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc)4 Generator (org.apache.hadoop.hive.ql.optimizer.lineage.Generator)4 ArrayList (java.util.ArrayList)3 Path (org.apache.hadoop.fs.Path)3 AbstractMapJoinOperator (org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator)3 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)3 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)3 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)3 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)3 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)3 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)3 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)3 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)3 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 Optimizer (org.apache.hadoop.hive.ql.optimizer.Optimizer)3 Transform (org.apache.hadoop.hive.ql.optimizer.Transform)3 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)3 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)2