Search in sources :

Example 66 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class DDLTask method unlockTable.

/**
 * Unlock the table/partition specified
 * @param db
 *
 * @param unlockTbl
 *          the table/partition to be unlocked
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 */
private int unlockTable(Hive db, UnlockTableDesc unlockTbl) throws HiveException {
    Context ctx = driverContext.getCtx();
    HiveTxnManager txnManager = ctx.getHiveTxnManager();
    return txnManager.unlockTable(db, unlockTbl);
}
Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Context(org.apache.hadoop.hive.ql.Context) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) DriverContext(org.apache.hadoop.hive.ql.DriverContext) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)

Example 67 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class UpdateDeleteSemanticAnalyzer method parseRewrittenQuery.

/**
 * Parse the newly generated SQL statement to get a new AST
 */
private ReparseResult parseRewrittenQuery(StringBuilder rewrittenQueryStr, String originalQuery) throws SemanticException {
    // Set dynamic partitioning to nonstrict so that queries do not need any partition
    // references.
    // todo: this may be a perf issue as it prevents the optimizer.. or not
    HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    // Parse the rewritten query string
    Context rewrittenCtx;
    try {
        rewrittenCtx = new Context(conf);
        // We keep track of all the contexts that are created by this query
        // so we can clear them when we finish execution
        ctx.addRewrittenStatementContext(rewrittenCtx);
    } catch (IOException e) {
        throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
    }
    rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
    rewrittenCtx.setIsUpdateDeleteMerge(true);
    rewrittenCtx.setCmd(rewrittenQueryStr.toString());
    ASTNode rewrittenTree;
    try {
        LOG.info("Going to reparse <" + originalQuery + "> as \n<" + rewrittenQueryStr.toString() + ">");
        rewrittenTree = ParseUtils.parse(rewrittenQueryStr.toString(), rewrittenCtx);
    } catch (ParseException e) {
        throw new SemanticException(ErrorMsg.UPDATEDELETE_PARSE_ERROR.getMsg(), e);
    }
    return new ReparseResult(rewrittenTree, rewrittenCtx);
}
Also used : Context(org.apache.hadoop.hive.ql.Context) IOException(java.io.IOException)

Example 68 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class ColumnStatsAutoGatherContext method genSelOpForAnalyze.

@SuppressWarnings("rawtypes")
private Operator genSelOpForAnalyze(String analyzeCommand, Context origCtx) throws IOException, ParseException, SemanticException {
    // 0. initialization
    Context ctx = new Context(conf);
    ctx.setExplainConfig(origCtx.getExplainConfig());
    ASTNode tree = ParseUtils.parse(analyzeCommand, ctx);
    // 1. get the ColumnStatsSemanticAnalyzer
    QueryState queryState = new QueryState.Builder().withHiveConf(conf).build();
    BaseSemanticAnalyzer baseSem = SemanticAnalyzerFactory.get(queryState, tree);
    ColumnStatsSemanticAnalyzer colSem = (ColumnStatsSemanticAnalyzer) baseSem;
    // 2. get the rewritten AST
    ASTNode ast = colSem.rewriteAST(tree, this);
    baseSem = SemanticAnalyzerFactory.get(queryState, ast);
    SemanticAnalyzer sem = (SemanticAnalyzer) baseSem;
    QB qb = new QB(null, null, false);
    ASTNode child = ast;
    ParseContext subPCtx = ((SemanticAnalyzer) sem).getParseContext();
    subPCtx.setContext(ctx);
    ((SemanticAnalyzer) sem).initParseCtx(subPCtx);
    sem.doPhase1(child, qb, sem.initPhase1Ctx(), null);
    // This will trigger new calls to metastore to collect metadata
    // TODO: cache the information from the metastore
    sem.getMetaData(qb);
    Operator<?> operator = sem.genPlan(qb);
    // 3. populate the load file work so that ColumnStatsTask can work
    loadFileWork.addAll(sem.getLoadFileWork());
    // 4. because there is only one TS for analyze statement, we can get it.
    if (sem.topOps.values().size() != 1) {
        throw new SemanticException("ColumnStatsAutoGatherContext is expecting exactly one TS, but finds " + sem.topOps.values().size());
    }
    operator = sem.topOps.values().iterator().next();
    // 5. get the first SEL after TS
    while (!(operator instanceof SelectOperator)) {
        operator = operator.getChildOperators().get(0);
    }
    return operator;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) QueryState(org.apache.hadoop.hive.ql.QueryState)

Example 69 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class MapReduceCompiler method decideExecMode.

@Override
protected void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException {
    // bypass for explain queries for now
    if (ctx.isExplainSkipExecution()) {
        return;
    }
    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }
    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {

        @Override
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);
    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, mrtask.getWork().getMapWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);
            long estimatedInput;
            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                // 
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = (globalLimitCtx.getGlobalOffset() + globalLimitCtx.getGlobalLimit()) * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput);
            }
            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }
    if (!hasNonLocalJob) {
        // Entire query can be run locally.
        // Save the current tracker value and restore it when done.
        ctx.setOriginalTracker(ShimLoader.getHadoopShims().getJobLauncherRpcAddress(conf));
        ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, "local");
        console.printInfo("Automatically selecting local only mode for query");
    }
}
Also used : Context(org.apache.hadoop.hive.ql.Context) PhysicalContext(org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext) GenMRProcContext(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext) Path(org.apache.hadoop.fs.Path) PathFilter(org.apache.hadoop.fs.PathFilter) ContentSummary(org.apache.hadoop.fs.ContentSummary) ExecDriver(org.apache.hadoop.hive.ql.exec.mr.ExecDriver) IOException(java.io.IOException)

Example 70 with Context

use of org.apache.hadoop.hive.ql.Context in project hive by apache.

the class GenSparkUtils method createMoveTask.

/**
 * Create and add any dependent move tasks.
 *
 * This is forked from {@link GenMapRedUtils}. The difference is that it doesn't check
 * 'isLinkedFileSink' and does not set parent dir for the linked file sinks.
 */
public static Path createMoveTask(Task<? extends Serializable> currTask, boolean chDir, FileSinkOperator fsOp, ParseContext parseCtx, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) {
    Path dest = null;
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    if (chDir) {
        dest = fsOp.getConf().getFinalDirName();
        // generate the temporary file
        // it must be on the same file system as the current destination
        Context baseCtx = parseCtx.getContext();
        Path tmpDir = baseCtx.getExternalTmpPath(dest);
        // Change all the linked file sink descriptors
        if (fileSinkDesc.getLinkedFileSinkDesc() != null) {
            for (FileSinkDesc fsConf : fileSinkDesc.getLinkedFileSinkDesc()) {
                fsConf.setDirName(tmpDir);
            }
        } else {
            fileSinkDesc.setDirName(tmpDir);
        }
    }
    Task<MoveWork> mvTask = null;
    if (!chDir) {
        mvTask = GenMapRedUtils.findMoveTaskForFsopOutput(mvTasks, fileSinkDesc.getFinalDirName(), false);
    }
    // Set the move task to be dependent on the current task
    if (mvTask != null) {
        GenMapRedUtils.addDependentMoveTasks(mvTask, hconf, currTask, dependencyTask);
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc)

Aggregations

Context (org.apache.hadoop.hive.ql.Context)103 Path (org.apache.hadoop.fs.Path)45 IOException (java.io.IOException)26 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)21 CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)20 Test (org.junit.Test)19 FileSystem (org.apache.hadoop.fs.FileSystem)16 HiveConf (org.apache.hadoop.hive.conf.HiveConf)16 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)16 DriverContext (org.apache.hadoop.hive.ql.DriverContext)15 HashMap (java.util.HashMap)13 HiveTxnManager (org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager)13 ParseContext (org.apache.hadoop.hive.ql.parse.ParseContext)13 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)13 ArrayList (java.util.ArrayList)12 Task (org.apache.hadoop.hive.ql.exec.Task)12 Table (org.apache.hadoop.hive.ql.metadata.Table)12 JobConf (org.apache.hadoop.mapred.JobConf)12 DDLWork (org.apache.hadoop.hive.ql.ddl.DDLWork)9 QueryState (org.apache.hadoop.hive.ql.QueryState)8