Search in sources :

Example 51 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ColumnStatsAutoGatherContext method genSelOpForAnalyze.

@SuppressWarnings("rawtypes")
private Operator genSelOpForAnalyze(String analyzeCommand, Context origCtx) throws IOException, ParseException, SemanticException {
    // 0. initialization
    Context ctx = new Context(conf);
    ctx.setExplainConfig(origCtx.getExplainConfig());
    ASTNode tree = ParseUtils.parse(analyzeCommand, ctx);
    // 1. get the ColumnStatsSemanticAnalyzer
    QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
    BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, tree);
    ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
    // 2. get the rewritten AST
    ASTNode ast = colSem.rewriteAST(tree, this);
    baseSem = SemanticAnalyzerFactory.get(queryState, ast);
    SemanticAnalyzer sem = (SemanticAnalyzer) baseSem;
    QB qb = new QB(null, null, false);
    ASTNode child = ast;
    ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
    subPCtx.setContext(ctx);
    ((SemanticAnalyzer) sem).initParseCtx(subPCtx);
    sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
    // This will trigger new calls to metastore to collect metadata
    // TODO: cache the information from the metastore
    sem.getMetaData(qb);
    Operator<?> operator = sem.genPlan(qb);
    // 3. populate the load file work so that ColumnStatsTask can work
    loadFileWork.addAll(sem.getLoadFileWork());
    // 4. because there is only one TS for analyze statement, we can get it.
    if (sem.topOps.values().size() != 1) {
        throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
    }
    operator = sem.topOps.values().iterator().next();
    // 5. get the first SEL after TS
    while (!(operator instanceof SelectOperator)) {
        operator = operator.getChildOperators().get(0);
    }
    return operator;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) QueryState(org.apache.hadoop.hive.ql.QueryState)

Example 52 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ExplainSemanticAnalyzer method analyzeInternal.

@SuppressWarnings("unchecked")
@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    final int childCount = ast.getChildCount();
    // Skip TOK_QUERY.
    int i = 1;
    while (i < childCount) {
        int explainOptions = ast.getChild(i).getType();
        if (explainOptions == HiveParser.KW_FORMATTED) {
            config.setFormatted(true);
        } else if (explainOptions == HiveParser.KW_EXTENDED) {
            config.setExtended(true);
        } else if (explainOptions == HiveParser.KW_DEPENDENCY) {
            config.setDependency(true);
        } else if (explainOptions == HiveParser.KW_LOGICAL) {
            config.setLogical(true);
        } else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
            config.setAuthorize(true);
        } else if (explainOptions == HiveParser.KW_ANALYZE) {
            config.setAnalyze(AnalyzeState.RUNNING);
            config.setExplainRootPath(ctx.getMRTmpPath());
        } else if (explainOptions == HiveParser.KW_VECTORIZATION) {
            config.setVectorization(true);
            if (i + 1 < childCount) {
                int vectorizationOption = ast.getChild(i + 1).getType();
                // [ONLY]
                if (vectorizationOption == HiveParser.TOK_ONLY) {
                    config.setVectorizationOnly(true);
                    i++;
                    if (i + 1 >= childCount) {
                        break;
                    }
                    vectorizationOption = ast.getChild(i + 1).getType();
                }
                // [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
                if (vectorizationOption == HiveParser.TOK_SUMMARY) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_DETAIL) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
                    i++;
                }
            }
        } else {
        // UNDONE: UNKNOWN OPTION?
        }
        i++;
    }
    ctx.setExplainConfig(config);
    ctx.setExplainPlan(true);
    ASTNode input = (ASTNode) ast.getChild(0);
    // step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
    if (config.getAnalyze() == AnalyzeState.RUNNING) {
        String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
        LOG.info("Explain analyze (running phase) for query " + query);
        Context runCtx = null;
        try {
            runCtx = new Context(conf);
            // runCtx and ctx share the configuration, but not isExplainPlan()
            runCtx.setExplainConfig(config);
            Driver driver = new Driver(conf, runCtx, queryState.getLineageState());
            CommandProcessorResponse ret = driver.run(query);
            if (ret.getResponseCode() == 0) {
                // However, we need to skip all the results.
                while (driver.getResults(new ArrayList<String>())) {
                }
            } else {
                throw new SemanticException(ret.getErrorMessage(), ret.getException());
            }
            config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
        } catch (IOException e1) {
            throw new SemanticException(e1);
        }
        ctx.resetOpContext();
        ctx.resetStream();
        TaskFactory.resetId();
        LOG.info("Explain analyze (analyzing phase) for query " + query);
        config.setAnalyze(AnalyzeState.ANALYZING);
    }
    // Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
    // Sharing QueryState between generating the plan and executing the query seems bad
    // BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
    sem.analyze(input, ctx);
    sem.validate();
    ctx.setResFile(ctx.getLocalTmpPath());
    List<Task<? extends Serializable>> tasks = sem.getAllRootTasks();
    if (tasks == null) {
        tasks = Collections.emptyList();
    }
    FetchTask fetchTask = sem.getFetchTask();
    if (fetchTask != null) {
        // Initialize fetch work such that operator tree will be constructed.
        fetchTask.getWork().initializeForFetch(ctx.getOpContext());
    }
    ParseContext pCtx = null;
    if (sem instanceof SemanticAnalyzer) {
        pCtx = ((SemanticAnalyzer) sem).getParseContext();
    }
    config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isLogical() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
    ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, sem, config, ctx.getCboInfo());
    work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
    ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
    fieldList = explTask.getResultSchema();
    rootTasks.add(explTask);
}
Also used : StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) Serializable(java.io.Serializable) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) CommandProcessorResponse(org.apache.hadoop.hive.ql.processors.CommandProcessorResponse) Driver(org.apache.hadoop.hive.ql.Driver) ExplainWork(org.apache.hadoop.hive.ql.plan.ExplainWork) IOException(java.io.IOException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 53 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MapReduceCompiler method decideExecMode.

@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
    // bypass for explain queries for now
    if (ctx.isExplainSkipExecution()) {
        return;
    }
    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }
    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {

        @Override
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
            long estimatedInput;
            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                // 
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
            }
            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }
    if (!hasNonLocalJob) {
        // Entire query can be run locally.
        // Save the current tracker value and restore it when done.
        ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
        ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
        console.printInfo("Automatically selecting local only mode for query");
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) IOException(java.io.IOException)

Example 54 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenSparkUtils method createMoveTask.

/**
 * Create and add any dependent move tasks.
 *
 * This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
 * 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
 */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (chDir) {
        dest = fsOp.getConf().getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        Path tmpDir = baseCtx.getExternalTmpPath(dest);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setDirName(tmpDir);
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 55 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TestDbTxnManager2 method setUp.

@Before
public void setUp() throws Exception {
    SessionState.start(conf);
    ctx = new Context(conf);
    driver = new Driver(new QueryState.Builder().withHiveConf(conf).nonIsolated().build(), null);
    TxnDbUtil.cleanDb(conf);
    TxnDbUtil.prepDb(conf);
    SessionState ss = SessionState.get();
    ss.initTxnMgr(conf);
    txnMgr = ss.getTxnMgr();
    Assert.assertTrue(txnMgr instanceof DbTxnManager);
    txnHandler = TxnUtils.getTxnStore(conf);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) SessionState(org.apache.hadoop.hive.ql.session.SessionState) Driver(org.apache.hadoop.hive.ql.Driver) QueryState(org.apache.hadoop.hive.ql.QueryState) Before(org.junit.Before)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)57 Path (org.apache.hadoop.fs.Path)25 IOException (java.io.IOException)19 DriverContext (org.apache.hadoop.hive.ql.DriverContext)16 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)14 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)12 FileSystem (org.apache.hadoop.fs.FileSystem)11 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)10 JobConf (org.apache.hadoop.mapred.JobConf)10 HiveConf (org.apache.hadoop.hive.conf.HiveConf)9 Serializable (java.io.Serializable)8 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)8 Test (org.junit.Test)8 Task (org.apache.hadoop.hive.ql.exec.Task)7 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)7 ArrayList (java.util.ArrayList)6 Table (org.apache.hadoop.hive.ql.metadata.Table)6 DAG (org.apache.tez.dag.api.DAG)6 HashMap (java.util.HashMap)5 LinkedHashMap (java.util.LinkedHashMap)5