Search in sources :

Example 1 with CacheUsage

use of org.apache.hadoop.hive.ql.cache.results.CacheUsage in project hive by apache.

the class Driver method useFetchFromCache.

private void useFetchFromCache(CacheEntry cacheEntry) {
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTaskFromCache = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    fetchTaskFromCache.initialize(queryState, plan, null, ctx.getOpContext());
    plan.setFetchTask(fetchTaskFromCache);
    cacheUsage = new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry);
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Example 2 with CacheUsage

use of org.apache.hadoop.hive.ql.cache.results.CacheUsage in project hive by apache.

the class SemanticAnalyzer method useCachedResult.

/**
 * Set the query plan to use cache entry passed in to return the query results.
 * @param cacheEntry The results cache entry that will be used to resolve the query.
 */
private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry) {
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTask = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    setFetchTask(fetchTask);
    queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation());
    resultSchema = cacheEntry.getQueryInfo().getResultSchema();
    setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo());
    setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo());
    inputs.addAll(cacheEntry.getQueryInfo().getInputs());
    // Set recursive traversal in case the cached query was UNION generated by Tez.
    conf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true);
    // Indicate that the query will use a cached result.
    setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Example 3 with CacheUsage

use of org.apache.hadoop.hive.ql.cache.results.CacheUsage in project hive by apache.

the class SemanticAnalyzer method analyzeInternal.

void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException {
    // 1. Generate Resolved Parse tree from syntax tree
    LOG.info("Starting Semantic Analysis");
    // change the location of position alias process here
    processPositionAlias(ast);
    PlannerContext plannerCtx = pcf.create();
    if (!genResolvedParseTree(ast, plannerCtx)) {
        return;
    }
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) {
        for (String alias : qb.getSubqAliases()) {
            removeOBInSubQuery(qb.getSubqForAlias(alias));
        }
    }
    // Check query results cache.
    // If no masking/filtering required, then we can check the cache now, before
    // generating the operator tree and going through CBO.
    // Otherwise we have to wait until after the masking/filtering step.
    boolean isCacheEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_QUERY_RESULTS_CACHE_ENABLED);
    QueryResultsCache.LookupInfo lookupInfo = null;
    boolean needsTransform = needsTransform();
    if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo)) {
            return;
        }
    }
    // 2. Gen OP Tree from resolved Parse Tree
    Operator sinkOp = genOPTree(ast, plannerCtx);
    if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) {
        // Here we rewrite the * and also the masking table
        ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens);
        if (tree != ast) {
            plannerCtx = pcf.create();
            ctx.setSkipTableMasking(true);
            init(true);
            // change the location of position alias process here
            processPositionAlias(tree);
            genResolvedParseTree(tree, plannerCtx);
            if (this instanceof CalcitePlanner) {
                ((CalcitePlanner) this).resetCalciteConfiguration();
            }
            sinkOp = genOPTree(tree, plannerCtx);
        }
    }
    // here, after applying the masking/filtering rewrite rules to the AST.
    if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
        lookupInfo = createLookupInfoForQuery(ast);
        if (checkResultsCache(lookupInfo)) {
            return;
        }
    }
    // 3. Deduce Resultset Schema
    if (createVwDesc != null && !this.ctx.isCboSucceeded()) {
        resultSchema = convertRowSchemaToViewSchema(opParseCtx.get(sinkOp).getRowResolver());
    } else {
        // succeeds.
        if (resultSchema == null) {
            resultSchema = convertRowSchemaToResultSetSchema(opParseCtx.get(sinkOp).getRowResolver(), HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
        }
    }
    // 4. Generate Parse Context for Optimizer & Physical compiler
    copyInfoToQueryProperties(queryProperties);
    ParseContext pCtx = new ParseContext(queryState, opToPartPruner, opToPartList, topOps, new HashSet<JoinOperator>(joinContext.keySet()), new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()), loadTableWork, loadFileWork, columnStatsAutoGatherContexts, ctx, idToTableNameMap, destTableId, uCtx, listMapJoinOpsNoReducer, prunedPartitions, tabNameToTabObject, opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner, viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, analyzeRewrite, tableDesc, createVwDesc, materializedViewUpdateDesc, queryProperties, viewProjectToTableSchema, acidFileSinks);
    // Set the semijoin hints in parse context
    pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
    // Set the mapjoin hint if it needs to be disabled.
    pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
    // 5. Take care of view creation
    if (createVwDesc != null) {
        if (ctx.getExplainAnalyze() == AnalyzeState.RUNNING) {
            return;
        }
        if (!ctx.isCboSucceeded()) {
            saveViewDefinition();
        }
        // validate the create view statement at this point, the createVwDesc gets
        // all the information for semanticcheck
        validateCreateView();
        if (createVwDesc.isMaterialized()) {
            createVwDesc.setTablesUsed(getTablesUsed(pCtx));
        } else {
            // Since we're only creating a view (not executing it), we don't need to
            // optimize or translate the plan (and in fact, those procedures can
            // interfere with the view creation). So skip the rest of this method.
            ctx.setResDir(null);
            ctx.setResFile(null);
            try {
                PlanUtils.addInputsForView(pCtx);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            // Generate lineage info for create view statements
            // if LineageLogger hook is configured.
            // Add the transformation that computes the lineage information.
            Set<String> postExecHooks = Sets.newHashSet(Splitter.on(",").trimResults().omitEmptyStrings().split(Strings.nullToEmpty(HiveConf.getVar(conf, HiveConf.ConfVars.POSTEXECHOOKS))));
            if (postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.PostExecutePrinter") || postExecHooks.contains("org.apache.hadoop.hive.ql.hooks.LineageLogger") || postExecHooks.contains("org.apache.atlas.hive.hook.HiveHook")) {
                ArrayList<Transform> transformations = new ArrayList<Transform>();
                transformations.add(new HiveOpConverterPostProc());
                transformations.add(new Generator(postExecHooks));
                for (Transform t : transformations) {
                    pCtx = t.transform(pCtx);
                }
                // we just use view name as location.
                queryState.getLineageState().mapDirToOp(new Path(createVwDesc.getViewName()), sinkOp);
            }
            return;
        }
    }
    // 6. Generate table access stats if required
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_TABLEKEYS)) {
        TableAccessAnalyzer tableAccessAnalyzer = new TableAccessAnalyzer(pCtx);
        setTableAccessInfo(tableAccessAnalyzer.analyzeTableAccess());
    }
    // 7. Perform Logical optimization
    if (LOG.isDebugEnabled()) {
        LOG.debug("Before logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    Optimizer optm = new Optimizer();
    optm.setPctx(pCtx);
    optm.initialize(conf);
    pCtx = optm.optimize();
    if (pCtx.getColumnAccessInfo() != null) {
        // set ColumnAccessInfo for view column authorization
        setColumnAccessInfo(pCtx.getColumnAccessInfo());
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("After logical optimization\n" + Operator.toString(pCtx.getTopOps().values()));
    }
    // 8. Generate column access stats if required - wait until column pruning
    // takes place during optimization
    boolean isColumnInfoNeedForAuth = SessionState.get().isAuthorizationModeV2() && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED);
    if (isColumnInfoNeedForAuth || HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        ColumnAccessAnalyzer columnAccessAnalyzer = new ColumnAccessAnalyzer(pCtx);
        // view column access info is carried by this.getColumnAccessInfo().
        setColumnAccessInfo(columnAccessAnalyzer.analyzeColumnAccess(this.getColumnAccessInfo()));
    }
    // TEZ..)
    if (!ctx.getExplainLogical()) {
        TaskCompiler compiler = TaskCompilerFactory.getCompiler(conf, pCtx);
        compiler.init(queryState, console, db);
        compiler.compile(pCtx, rootTasks, inputs, outputs);
        fetchTask = pCtx.getFetchTask();
    }
    // find all Acid FileSinkOperatorS
    QueryPlanPostProcessor qp = new QueryPlanPostProcessor(rootTasks, acidFileSinks, ctx.getExecutionId());
    LOG.info("Completed plan generation");
    // 10. put accessed columns to readEntity
    if (HiveConf.getBoolVar(this.conf, HiveConf.ConfVars.HIVE_STATS_COLLECT_SCANCOLS)) {
        putAccessedColumnsToReadEntity(inputs, columnAccessInfo);
    }
    if (isCacheEnabled && lookupInfo != null) {
        if (queryCanBeCached()) {
            QueryResultsCache.QueryInfo queryInfo = createCacheQueryInfoForQuery(lookupInfo);
            // Specify that the results of this query can be cached.
            setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.CAN_CACHE_QUERY_RESULTS, queryInfo));
        }
    }
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) QueryPlanPostProcessor(org.apache.hadoop.hive.ql.optimizer.QueryPlanPostProcessor) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Optimizer(org.apache.hadoop.hive.ql.optimizer.Optimizer) ArrayList(java.util.ArrayList) QueryResultsCache(org.apache.hadoop.hive.ql.cache.results.QueryResultsCache) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) HiveOpConverterPostProc(org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage) Path(org.apache.hadoop.fs.Path) Transform(org.apache.hadoop.hive.ql.optimizer.Transform) Generator(org.apache.hadoop.hive.ql.optimizer.lineage.Generator)

Example 4 with CacheUsage

use of org.apache.hadoop.hive.ql.cache.results.CacheUsage in project hive by apache.

the class SemanticAnalyzer method useCachedResult.

/**
 * Set the query plan to use cache entry passed in to return the query results.
 * @param cacheEntry The results cache entry that will be used to resolve the query.
 */
private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean needsReset) {
    if (needsReset) {
        reset(true);
        inputs.clear();
    }
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTask = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    setFetchTask(fetchTask);
    queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation());
    resultSchema = cacheEntry.getQueryInfo().getResultSchema();
    setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo());
    setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo());
    inputs.addAll(cacheEntry.getQueryInfo().getInputs());
    // Set recursive traversal in case the cached query was UNION generated by Tez.
    conf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true);
    // Indicate that the query will use a cached result.
    setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Example 5 with CacheUsage

use of org.apache.hadoop.hive.ql.cache.results.CacheUsage in project hive by apache.

the class Executor method useFetchFromCache.

private void useFetchFromCache(CacheEntry cacheEntry) {
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTaskFromCache = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    fetchTaskFromCache.initialize(driverContext.getQueryState(), driverContext.getPlan(), null, context);
    driverContext.getPlan().setFetchTask(fetchTaskFromCache);
    driverContext.setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Aggregations

CacheUsage (org.apache.hadoop.hive.ql.cache.results.CacheUsage)6 FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)5 ArrayList (java.util.ArrayList)2 Splitter (com.google.common.base.Splitter)1 Strings (com.google.common.base.Strings)1 ArrayListMultimap (com.google.common.collect.ArrayListMultimap)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Lists (com.google.common.collect.Lists)1 Multimap (com.google.common.collect.Multimap)1 Sets (com.google.common.collect.Sets)1 IntMath (com.google.common.math.IntMath)1 LongMath (com.google.common.math.LongMath)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 AccessControlException (java.security.AccessControlException)1 ArrayDeque (java.util.ArrayDeque)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 Deque (java.util.Deque)1