Search in sources :

Example 6 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class SemanticAnalyzer method useCachedResult.

/**
 * Set the query plan to use cache entry passed in to return the query results.
 * @param cacheEntry The results cache entry that will be used to resolve the query.
 */
private void useCachedResult(QueryResultsCache.CacheEntry cacheEntry, boolean needsReset) {
    if (needsReset) {
        reset(true);
        inputs.clear();
    }
    // Change query FetchTask to use new location specified in results cache.
    FetchTask fetchTask = (FetchTask) TaskFactory.get(cacheEntry.getFetchWork());
    setFetchTask(fetchTask);
    queryState.setCommandType(cacheEntry.getQueryInfo().getHiveOperation());
    resultSchema = cacheEntry.getQueryInfo().getResultSchema();
    setTableAccessInfo(cacheEntry.getQueryInfo().getTableAccessInfo());
    setColumnAccessInfo(cacheEntry.getQueryInfo().getColumnAccessInfo());
    inputs.addAll(cacheEntry.getQueryInfo().getInputs());
    // Set recursive traversal in case the cached query was UNION generated by Tez.
    conf.setBoolean(FileInputFormat.INPUT_DIR_RECURSIVE, true);
    // Indicate that the query will use a cached result.
    setCacheUsage(new CacheUsage(CacheUsage.CacheStatus.QUERY_USING_CACHE, cacheEntry));
}
Also used : FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) CacheUsage(org.apache.hadoop.hive.ql.cache.results.CacheUsage)

Example 7 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class TaskCompiler method compile.

@SuppressWarnings("nls")
public void compile(final ParseContext pCtx, final List<Task<?>> rootTasks, final Set<ReadEntity> inputs, final Set<WriteEntity> outputs) throws SemanticException {
    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<>();
    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
    boolean directInsertCtas = false;
    if (pCtx.getCreateTable() != null && pCtx.getCreateTable().getStorageHandler() != null) {
        try {
            directInsertCtas = HiveUtils.getStorageHandler(conf, pCtx.getCreateTable().getStorageHandler()).directInsertCTAS();
        } catch (HiveException e) {
            throw new SemanticException("Failed to load storage handler:  " + e.getMessage());
        }
    }
    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }
        return;
    }
    if (!pCtx.getQueryProperties().isAnalyzeCommand()) {
        LOG.debug("Skipping optimize operator plan for analyze command.");
        optimizeOperatorPlan(pCtx);
    }
    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }
        LoadFileDesc loadFileDesc = loadFileWork.get(0);
        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();
        TableDesc resultTab = pCtx.getFetchTableDesc();
        boolean shouldSetOutputFormatter = false;
        if (resultTab == null) {
            ResultFileFormat resFileFormat = conf.getResultFileFormat();
            String fileFormat;
            Class<? extends Deserializer> serdeClass;
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && resFileFormat == ResultFileFormat.SEQUENCEFILE) {
                fileFormat = resFileFormat.toString();
                serdeClass = ThriftJDBCBinarySerDe.class;
                shouldSetOutputFormatter = true;
            } else if (resFileFormat == ResultFileFormat.SEQUENCEFILE) {
                // file format is changed so that IF file sink provides list of files to fetch from (instead
                // of whole directory) list status is done on files (which is what HiveSequenceFileInputFormat does)
                fileFormat = "HiveSequenceFile";
                serdeClass = LazySimpleSerDe.class;
            } else {
                // All other cases we use the defined file format and LazySimpleSerde
                fileFormat = resFileFormat.toString();
                serdeClass = LazySimpleSerDe.class;
            }
            resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat, serdeClass);
        } else {
            shouldSetOutputFormatter = resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB).equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName());
        }
        if (shouldSetOutputFormatter) {
            // Set the fetch formatter to be a no-op for the ListSinkOperator, since we will
            // read formatted thrift objects from the output SequenceFile written by Tasks.
            conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
        }
        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }
        // The idea here is to keep an object reference both in FileSink and in FetchTask for list of files
        // to be fetched. During Job close file sink will populate the list and fetch task later will use it
        // to fetch the results.
        Collection<Operator<?>> tableScanOps = Lists.<Operator<?>>newArrayList(pCtx.getTopOps().values());
        Set<FileSinkOperator> fsOps = OperatorUtils.findOperators(tableScanOps, FileSinkOperator.class);
        if (fsOps != null && fsOps.size() == 1) {
            FileSinkOperator op = fsOps.iterator().next();
            Set<FileStatus> filesToFetch = new HashSet<>();
            op.getConf().setFilesToFetch(filesToFetch);
            fetch.setFilesToFetch(filesToFetch);
        }
        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch));
        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();
        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false));
            mvTask.add(tsk);
        }
        boolean oneLoadFileForCtas = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                if (!oneLoadFileForCtas) {
                    // should not have more than 1 load file for CTAS.
                    throw new SemanticException("One query is not expected to contain multiple CTAS loads statements");
                }
                setLoadFileLocation(pCtx, lfd);
                oneLoadFileForCtas = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false)));
        }
    }
    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
    // For each task, set the key descriptor for the reducer
    for (Task<?> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }
    // to be used, please do so
    for (Task<?> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }
    optimizeTaskPlan(rootTasks, pCtx, ctx);
    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     * As per HIVE-15903, we will also collect table stats when user computes column stats.
     * That means, if isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()
     * We need to collect table stats
     * if isCStats, we need to include a basic stats task
     * else it is ColumnStatsAutoGather, which should have a move task with a stats task already.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        // map from tablename to task (ColumnStatsTask which includes a BasicStatsTask)
        Map<String, StatsTask> map = new LinkedHashMap<>();
        if (isCStats) {
            if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null || pCtx.getTopOps().size() != 1) {
                throw new SemanticException("Can not find correct root task!");
            }
            try {
                Task<?> root = rootTasks.iterator().next();
                StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values().iterator().next(), root, outputs);
                root.addDependentTask(tsk);
                map.put(extractTableFullName(tsk), tsk);
            } catch (HiveException e) {
                throw new SemanticException(e);
            }
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0);
        } else {
            Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
            getLeafTasks(rootTasks, leafTasks);
            List<Task<?>> nonStatsLeafTasks = new ArrayList<>();
            for (Task<?> tsk : leafTasks) {
                // map table name to the correct ColumnStatsTask
                if (tsk instanceof StatsTask) {
                    map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk);
                } else {
                    nonStatsLeafTasks.add(tsk);
                }
            }
            // add cStatsTask as a dependent of all the nonStatsLeafTasks
            for (Task<?> tsk : nonStatsLeafTasks) {
                for (Task<?> cStatsTask : map.values()) {
                    tsk.addDependentTask(cStatsTask);
                }
            }
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx.getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, numBitVector);
                }
            }
        }
    }
    decideExecMode(rootTasks, ctx, globalLimitCtx);
    // ahead of time by the non-native table
    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization() && !directInsertCtas) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();
        crtTblDesc.validate(conf);
        Task<?> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtTblTask, CollectionUtils.isEmpty(crtTblDesc.getPartColNames()));
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateMaterializedViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<?> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc));
        patchUpAfterCTASorMaterializedView(rootTasks, inputs, outputs, crtViewTask, CollectionUtils.isEmpty(viewDesc.getPartColNames()));
    } else if (pCtx.getMaterializedViewUpdateDesc() != null) {
        // If there is a materialized view update desc, we create introduce it at the end
        // of the tree.
        MaterializedViewUpdateDesc materializedViewDesc = pCtx.getMaterializedViewUpdateDesc();
        DDLWork ddlWork = new DDLWork(inputs, outputs, materializedViewDesc);
        Set<Task<?>> leafTasks = new LinkedHashSet<Task<?>>();
        getLeafTasks(rootTasks, leafTasks);
        Task<?> materializedViewTask = TaskFactory.get(ddlWork, conf);
        for (Task<?> task : leafTasks) {
            task.addDependentTask(materializedViewTask);
        }
    }
    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }
    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
    }
    Interner<TableDesc> interner = Interners.newStrongInterner();
    // Perform Final chores on generated Map works
    // 1.  Intern the table descriptors
    // 2.  Derive final explain attributes based on previous compilation.
    GenMapRedUtils.finalMapWorkChores(rootTasks, pCtx.getConf(), interner);
}
Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) MoveWork(org.apache.hadoop.hive.ql.plan.MoveWork) LinkedHashSet(java.util.LinkedHashSet) DDLTask(org.apache.hadoop.hive.ql.ddl.DDLTask) BasicStatsNoJobTask(org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) Task(org.apache.hadoop.hive.ql.exec.Task) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) MoveTask(org.apache.hadoop.hive.ql.exec.MoveTask) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FileStatus(org.apache.hadoop.fs.FileStatus) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) StatsTask(org.apache.hadoop.hive.ql.exec.StatsTask) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) ThriftFormatter(org.apache.hadoop.hive.serde2.thrift.ThriftFormatter) ThriftJDBCBinarySerDe(org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe) NoOpFetchFormatter(org.apache.hadoop.hive.serde2.NoOpFetchFormatter) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Context(org.apache.hadoop.hive.ql.Context) AnalyzeRewriteContext(org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext) LoadFileDesc(org.apache.hadoop.hive.ql.plan.LoadFileDesc) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) CreateMaterializedViewDesc(org.apache.hadoop.hive.ql.ddl.view.create.CreateMaterializedViewDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ResultFileFormat(org.apache.hadoop.hive.conf.HiveConf.ResultFileFormat) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc) MaterializedViewUpdateDesc(org.apache.hadoop.hive.ql.ddl.view.materialized.update.MaterializedViewUpdateDesc) DDLWork(org.apache.hadoop.hive.ql.ddl.DDLWork) DefaultFetchFormatter(org.apache.hadoop.hive.serde2.DefaultFetchFormatter) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) LoadTableDesc(org.apache.hadoop.hive.ql.plan.LoadTableDesc) CreateTableDesc(org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc)

Example 8 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class GenSparkUtils method processFileSink.

public void processFileSink(GenSparkProcContext context, FileSinkOperator fileSink) throws SemanticException {
    ParseContext parseContext = context.parseContext;
    // is INSERT OVERWRITE TABLE
    boolean isInsertTable = GenMapRedUtils.isInsertInto(parseContext, fileSink);
    HiveConf hconf = parseContext.getConf();
    boolean chDir = GenMapRedUtils.isMergeRequired(context.moveTask, hconf, fileSink, context.currentTask, isInsertTable);
    // Set stats config for FileSinkOperators which are cloned from the fileSink
    List<FileSinkOperator> fileSinkList = context.fileSinkMap.get(fileSink);
    if (fileSinkList != null) {
        for (FileSinkOperator fsOp : fileSinkList) {
            fsOp.getConf().setGatherStats(fileSink.getConf().isGatherStats());
            fsOp.getConf().setStatsReliable(fileSink.getConf().isStatsReliable());
        }
    }
    Path finalName = createMoveTask(context.currentTask, chDir, fileSink, parseContext, context.moveTask, hconf, context.dependencyTask);
    if (chDir) {
        // Merge the files in the destination table/partitions by creating Map-only merge job
        // If underlying data is RCFile a RCFileBlockMerge task would be created.
        LOG.info("using CombineHiveInputformat for the merge job");
        GenMapRedUtils.createMRWorkForMergingFiles(fileSink, finalName, context.dependencyTask, context.moveTask, hconf, context.currentTask, parseContext.getQueryState().getLineageState());
    }
    FetchTask fetchTask = parseContext.getFetchTask();
    if (fetchTask != null && context.currentTask.getNumChild() == 0) {
        if (fetchTask.isFetchFrom(fileSink.getConf())) {
            context.currentTask.setFetchSource(true);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) HiveConf(org.apache.hadoop.hive.conf.HiveConf) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 9 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class Driver method releasePlan.

private void releasePlan() {
    try {
        if (driverContext.getPlan() != null) {
            FetchTask fetchTask = driverContext.getPlan().getFetchTask();
            if (fetchTask != null) {
                fetchTask.setTaskQueue(null);
                fetchTask.setQueryPlan(null);
            }
            driverContext.setFetchTask(fetchTask);
        }
        driverContext.setPlan(null);
    } catch (Exception e) {
        LOG.debug("Exception while clearing the Fetch task", e);
    }
}
Also used : SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) LockException(org.apache.hadoop.hive.ql.lockmgr.LockException) IOException(java.io.IOException) CommandProcessorException(org.apache.hadoop.hive.ql.processors.CommandProcessorException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask)

Example 10 with FetchTask

use of org.apache.hadoop.hive.ql.exec.FetchTask in project hive by apache.

the class GenMRFileSink1 method process.

/**
 * File Sink Operator encountered.
 *
 * @param nd
 *          the file sink operator encountered
 * @param opProcCtx
 *          context
 */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ParseContext parseCtx = ctx.getParseCtx();
    boolean chDir = false;
    // we should look take the parent of fsOp's task as the current task.
    FileSinkOperator fsOp = (FileSinkOperator) nd;
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    GenMapRedCtx mapredCtx = mapCurrCtx.get(fsOp.getParentOperators().get(0));
    Task<?> currTask = mapredCtx.getCurrTask();
    ctx.setCurrTask(currTask);
    ctx.addRootIfPossible(currTask);
    // is INSERT OVERWRITE TABLE
    boolean isInsertTable = GenMapRedUtils.isInsertInto(parseCtx, fsOp);
    HiveConf hconf = parseCtx.getConf();
    // Mark this task as a final map reduce task (ignoring the optional merge task)
    ((MapredWork) currTask.getWork()).setFinalMapRed(true);
    // If this file sink desc has been processed due to a linked file sink desc,
    // use that task
    Map<FileSinkDesc, Task<?>> fileSinkDescs = ctx.getLinkedFileDescTasks();
    if (fileSinkDescs != null) {
        Task<?> childTask = fileSinkDescs.get(fsOp.getConf());
        processLinkedFileDesc(ctx, childTask);
        return true;
    }
    // So, no need to attempt to merge the files again.
    if ((ctx.getSeenFileSinkOps() == null) || (!ctx.getSeenFileSinkOps().contains(nd))) {
        chDir = GenMapRedUtils.isMergeRequired(ctx.getMvTask(), hconf, fsOp, currTask, isInsertTable);
    }
    Path finalName = processFS(fsOp, stack, opProcCtx, chDir);
    if (chDir) {
        // Merge the files in the destination table/partitions by creating Map-only merge job
        // If underlying data is RCFile or OrcFile, RCFileBlockMerge task or
        // OrcFileStripeMerge task would be created.
        LOG.info("using CombineHiveInputformat for the merge job");
        GenMapRedUtils.createMRWorkForMergingFiles(fsOp, finalName, ctx.getDependencyTaskForMultiInsert(), ctx.getMvTask(), hconf, currTask, parseCtx.getQueryState().getLineageState());
    }
    FileSinkDesc fileSinkDesc = fsOp.getConf();
    // There are linked file sink operators and child tasks are present
    if (fileSinkDesc.isLinkedFileSink() && (currTask.getChildTasks() != null) && (currTask.getChildTasks().size() == 1)) {
        Map<FileSinkDesc, Task<?>> linkedFileDescTasks = ctx.getLinkedFileDescTasks();
        if (linkedFileDescTasks == null) {
            linkedFileDescTasks = new HashMap<FileSinkDesc, Task<?>>();
            ctx.setLinkedFileDescTasks(linkedFileDescTasks);
        }
        for (FileSinkDesc fileDesc : fileSinkDesc.getLinkedFileSinkDesc()) {
            linkedFileDescTasks.put(fileDesc, currTask.getChildTasks().get(0));
        }
    }
    FetchTask fetchTask = parseCtx.getFetchTask();
    if (fetchTask != null && currTask.getNumChild() == 0) {
        if (fetchTask.isFetchFrom(fileSinkDesc)) {
            currTask.setFetchSource(true);
        }
    }
    return true;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Path(org.apache.hadoop.fs.Path) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) HiveConf(org.apache.hadoop.hive.conf.HiveConf) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

FetchTask (org.apache.hadoop.hive.ql.exec.FetchTask)27 ArrayList (java.util.ArrayList)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 List (java.util.List)7 ValidTxnList (org.apache.hadoop.hive.common.ValidTxnList)7 Path (org.apache.hadoop.fs.Path)6 FetchWork (org.apache.hadoop.hive.ql.plan.FetchWork)6 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 HiveConf (org.apache.hadoop.hive.conf.HiveConf)5 Context (org.apache.hadoop.hive.ql.Context)4 CacheUsage (org.apache.hadoop.hive.ql.cache.results.CacheUsage)4 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)4 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)4 Operator (org.apache.hadoop.hive.ql.exec.Operator)3 Task (org.apache.hadoop.hive.ql.exec.Task)3 CommandProcessorException (org.apache.hadoop.hive.ql.processors.CommandProcessorException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2