Search in sources :

Example 86 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenMapRedUtils method createMoveTask.

/**
 * Create and add any dependent move tasks
 *
 * @param currTask
 * @param chDir
 * @param fsOp
 * @param parseCtx
 * @param mvTasks
 * @param hconf
 * @param dependencyTask
 * @return
 */
public static Path createMoveTask(Task<?> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    boolean isMmTable = fileSinkDesc.isMmTable();
    boolean isDirectInsert = fileSinkDesc.isDirectInsert();
    if (chDir) {
        dest = fileSinkDesc.getMergeInputDirName();
        /**
         * Skip temporary file generation for:
         * 1. MM Tables
         * 2. INSERT operation on full ACID table
         */
        if (!isMmTable && !isDirectInsert) {
            // generate the temporary file
            // it must be on the same file system as the current destination
            Context baseCtx = parseCtx.getContext();
            // Create the required temporary file in the HDFS location if the destination
            // path of the FileSinkOperator table is a blobstore path.
            Path tmpDir = baseCtx.getTempDirForFinalJobPath(fileSinkDesc.getDestPath());
            // Change all the linked file sink descriptors
            if (fileSinkDesc.isLinkedFileSink()) {
                for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                    fsConf.setDirName(new Path(tmpDir, fsConf.getDirName().getName()));
                    if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                        Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir for LinkedFileSink chDir " + fsConf.getDirName() + "; dest was " + fileSinkDesc.getDestPath());
                    }
                }
            } else {
                fileSinkDesc.setDirName(tmpDir);
                if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) {
                    Utilities.FILE_OP_LOGGER.trace("createMoveTask setting tmpDir chDir " + tmpDir + "; dest was " + fileSinkDesc.getDestPath());
                }
            }
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fsOp.getConf().getFinalDirName(), isMmTable, isDirectInsert, fsOp.getConf().getMoveTaskId(), fsOp.getConf().getAcidOperation());
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Example 87 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ColumnStatsAutoGatherContext method genSelOp.

private Operator genSelOp(String command, boolean rewritten, Context origCtx) throws ParseException, SemanticException {
    // 1. initialization
    Context ctx = new Context(conf);
    origCtx.addSubContext(ctx);
    ctx.setOpContext(origCtx.getOpContext());
    ctx.setExplainConfig(origCtx.getExplainConfig());
    // 2. parse tree and create semantic analyzer. if we need to rewrite the analyze
    // statement, we do it now
    final ASTNode ast;
    final SemanticAnalyzer sem;
    final QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
    if (rewritten) {
        // Create the context object that is needed to store the column stats
        this.analyzeRewrite = ColumnStatsSemanticAnalyzer.genAnalyzeRewriteContext(conf, tbl);
        // The analyze statement has already been rewritten, we just need to create the AST
        // and the corresponding semantic analyzer
        ast = ParseUtils.parse(command, ctx);
        BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, ast);
        sem = (SemanticAnalyzer) baseSem;
    } else {
        // We need to rewrite the analyze command and get the rewritten AST
        ASTNode analyzeTree = ParseUtils.parse(command, ctx);
        BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, analyzeTree);
        ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
        ast = colSem.rewriteAST(analyzeTree, this);
        // Obtain the context object that is needed to store the column stats
        this.analyzeRewrite = colSem.getAnalyzeRewriteContext();
        // Analyze the rewritten statement
        baseSem = SemanticAnalyzerFactory.get(queryState, ast);
        sem = (SemanticAnalyzer) baseSem;
    }
    QB qb = new QB(null, null, false);
    ASTNode child = ast;
    ParseContext subPCtx = sem.getParseContext();
    subPCtx.setContext(ctx);
    sem.initParseCtx(subPCtx);
    sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
    // This will trigger new calls to metastore to collect metadata
    // TODO: cache the information from the metastore
    sem.getMetaData(qb);
    sem.genPlan(qb);
    // 3. populate the load file work so that ColumnStatsTask can work
    loadFileWork.addAll(sem.getLoadFileWork());
    // 4. because there is only one TS for analyze statement, we can get it.
    if (sem.topOps.values().size() != 1) {
        throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
    }
    Operator<?> operator = sem.topOps.values().iterator().next();
    // otherwise, get the first SEL after TS
    if (rewritten) {
        while (!(operator instanceof UDTFOperator)) {
            operator = operator.getChildOperators().get(0);
        }
        operator = operator.getChildOperators().get(0);
    } else {
        while (!(operator instanceof SelectOperator)) {
            operator = operator.getChildOperators().get(0);
        }
    }
    return operator;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) UDTFOperator(org.apache.hadoop.hive.ql.exec.UDTFOperator) QueryState(org.apache.hadoop.hive.ql.QueryState) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator)

Example 88 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MapReduceCompiler method decideExecMode.

@Override
protected void decideExecMode(List<Task<?>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
    // bypass for explain queries for now
    if (ctx.isExplainSkipExecution()) {
        return;
    }
    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }
    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {

        @Override
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
            long estimatedInput;
            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                // 
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
            }
            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }
    if (!hasNonLocalJob) {
        // Entire query can be run locally.
        // Save the current tracker value and restore it when done.
        ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
        ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
        console.printInfo("Automatically selecting local only mode for query");
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) IOException(java.io.IOException)

Example 89 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ExplainSemanticAnalyzer method analyzeInternal.

@Override
public void analyzeInternal(ASTNode ast) throws SemanticException {
    final int childCount = ast.getChildCount();
    // Skip TOK_QUERY.
    int i = 1;
    while (i < childCount) {
        int explainOptions = ast.getChild(i).getType();
        if (explainOptions == HiveParser.KW_FORMATTED) {
            config.setFormatted(true);
        } else if (explainOptions == HiveParser.KW_EXTENDED) {
            config.setExtended(true);
        } else if (explainOptions == HiveParser.KW_DEPENDENCY) {
            config.setDependency(true);
        } else if (explainOptions == HiveParser.KW_CBO) {
            config.setCbo(true);
        } else if (explainOptions == HiveParser.KW_COST) {
            config.setCboCost(true);
        } else if (explainOptions == HiveParser.KW_JOINCOST) {
            config.setCboJoinCost(true);
        } else if (explainOptions == HiveParser.KW_LOGICAL) {
            config.setLogical(true);
        } else if (explainOptions == HiveParser.KW_AUTHORIZATION) {
            config.setAuthorize(true);
        } else if (explainOptions == HiveParser.KW_ANALYZE) {
            config.setAnalyze(AnalyzeState.RUNNING);
            config.setExplainRootPath(ctx.getMRTmpPath());
        } else if (explainOptions == HiveParser.KW_VECTORIZATION) {
            config.setVectorization(true);
            if (i + 1 < childCount) {
                int vectorizationOption = ast.getChild(i + 1).getType();
                // [ONLY]
                if (vectorizationOption == HiveParser.TOK_ONLY) {
                    config.setVectorizationOnly(true);
                    i++;
                    if (i + 1 >= childCount) {
                        break;
                    }
                    vectorizationOption = ast.getChild(i + 1).getType();
                }
                // [SUMMARY|OPERATOR|EXPRESSION|DETAIL]
                if (vectorizationOption == HiveParser.TOK_SUMMARY) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.SUMMARY);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_OPERATOR) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.OPERATOR);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_EXPRESSION) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.EXPRESSION);
                    i++;
                } else if (vectorizationOption == HiveParser.TOK_DETAIL) {
                    config.setVectorizationDetailLevel(VectorizationDetailLevel.DETAIL);
                    i++;
                }
            }
        } else if (explainOptions == HiveParser.KW_LOCKS) {
            config.setLocks(true);
        } else if (explainOptions == HiveParser.KW_AST) {
            config.setAst(true);
        } else if (explainOptions == HiveParser.KW_DEBUG) {
            config.setDebug(true);
        } else if (explainOptions == HiveParser.KW_DDL) {
            config.setDDL(true);
            config.setCbo(true);
            config.setVectorization(true);
        } else {
        // UNDONE: UNKNOWN OPTION?
        }
        i++;
    }
    ctx.setExplainConfig(config);
    ctx.setExplainPlan(true);
    ASTNode input = (ASTNode) ast.getChild(0);
    // step 2 (ANALYZE_STATE.ANALYZING), explain the query and provide the runtime #rows collected.
    if (config.getAnalyze() == AnalyzeState.RUNNING) {
        String query = ctx.getTokenRewriteStream().toString(input.getTokenStartIndex(), input.getTokenStopIndex());
        LOG.info("Explain analyze (running phase) for query " + query);
        conf.unset(ValidTxnList.VALID_TXNS_KEY);
        conf.unset(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
        Context runCtx = null;
        try {
            runCtx = new Context(conf);
            // runCtx and ctx share the configuration, but not isExplainPlan()
            runCtx.setExplainConfig(config);
            try (Driver driver = new Driver(conf, runCtx, queryState.getLineageState())) {
                driver.run(query);
                while (driver.getResults(new ArrayList<String>())) {
                }
            } catch (CommandProcessorException e) {
                if (e.getCause() instanceof ReCompileException) {
                    throw (ReCompileException) e.getCause();
                } else {
                    throw new SemanticException(e.getMessage(), e);
                }
            }
            config.setOpIdToRuntimeNumRows(aggregateStats(config.getExplainRootPath()));
        } catch (IOException e1) {
            throw new SemanticException(e1);
        }
        ctx.resetOpContext();
        ctx.resetStream();
        TaskFactory.resetId();
        LOG.info("Explain analyze (analyzing phase) for query " + query);
        config.setAnalyze(AnalyzeState.ANALYZING);
    }
    // Creating new QueryState unfortunately causes all .q.out to change - do this in a separate ticket
    // Sharing QueryState between generating the plan and executing the query seems bad
    // BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(new QueryState(queryState.getConf()), input);
    BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(queryState, input);
    sem.analyze(input, ctx);
    sem.validate();
    inputs = sem.getInputs();
    outputs = sem.getOutputs();
    ctx.setResFile(ctx.getLocalTmpPath());
    List<Task<?>> tasks = sem.getAllRootTasks();
    if (tasks == null) {
        tasks = Collections.emptyList();
    }
    FetchTask fetchTask = sem.getFetchTask();
    if (fetchTask != null) {
        // Initialize fetch work such that operator tree will be constructed.
        fetchTask.getWork().initializeForFetch(ctx.getOpContext());
    }
    if (sem instanceof SemanticAnalyzer) {
        pCtx = sem.getParseContext();
    }
    config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() && !config.isCbo() && !config.isLogical() && !config.isVectorization() && !config.isAuthorize() && ((HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) || (HiveConf.getBoolVar(ctx.getConf(), HiveConf.ConfVars.HIVE_SPARK_EXPLAIN_USER) && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark"))));
    ExplainWork work = new ExplainWork(ctx.getResFile(), pCtx, tasks, fetchTask, input, sem, config, ctx.getCboInfo(), ctx.getOptimizedSql(), ctx.getCalcitePlan());
    work.setAppendTaskType(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES));
    ExplainTask explTask = (ExplainTask) TaskFactory.get(work);
    fieldList = ExplainTask.getResultSchema();
    rootTasks.add(explTask);
}
Also used : StatsCollectionContext(org.apache.hadoop.hive.ql.stats.StatsCollectionContext) Context(org.apache.hadoop.hive.ql.Context) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) ExplainTask(org.apache.hadoop.hive.ql.exec.ExplainTask) Driver(org.apache.hadoop.hive.ql.Driver) ExplainWork(org.apache.hadoop.hive.ql.plan.ExplainWork) ReCompileException(org.apache.hadoop.hive.ql.reexec.ReCompileException) IOException(java.io.IOException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 90 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class TestDummyTxnManager method setUp.

@Before
public void setUp() throws Exception {
    conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
    conf.setVar(HiveConf.ConfVars.HIVE_TXN_MANAGER, DummyTxnManager.class.getName());
    conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    SessionState.start(conf);
    ctx = new Context(conf);
    txnMgr = TxnManagerFactory.getTxnManagerFactory().getTxnManager(conf);
    Assert.assertTrue(txnMgr instanceof DummyTxnManager);
    // Use reflection to set LockManager since creating the object using the
    // relection in DummyTxnManager won't take Mocked object
    Field field = DummyTxnManager.class.getDeclaredField("lockMgr");
    field.setAccessible(true);
    field.set(txnMgr, mockLockManager);
    Field field2 = DummyTxnManager.class.getDeclaredField("lockManagerCtx");
    field2.setAccessible(true);
    field2.set(txnMgr, mockLockManagerCtx);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) Field(java.lang.reflect.Field) Before(org.junit.Before)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)103 Path (org.apache.hadoop.fs.Path)45 IOException (java.io.IOException)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)20 Test (org.junit.Test)19 FileSystem (org.apache.hadoop.fs.FileSystem)16 HiveConf (org.apache.hadoop.hive.conf.HiveConf)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 HashMap (java.util.HashMap)13 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)13 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)13 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)13 ArrayList (java.util.ArrayList)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 Table (org.apache.hadoop.hive.ql.metadata.Table)12 JobConf (org.apache.hadoop.mapred.JobConf)12 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)9 QueryState (org.apache.hadoop.hive.ql.QueryState)8