Search in sources :

Example 31 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMRUnion1 method processSubQueryUnionCreateIntermediate.

/**
 * Process the union when the parent is a map-reduce job. Create a temporary
 * output, and let the union task read from the temporary output.
 *
 * The files created for all the inputs are in the union context and later
 * used to initialize the union plan
 *
 * @param parent
 * @param child
 * @param uTask
 * @param ctx
 * @param uCtxTask
 */
private void processSubQueryUnionCreateIntermediate(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child, Task<?> uTask, GenMRProcContext ctx, GenMRUnionCtx uCtxTask) {
    ParseContext parseCtx = ctx.getParseCtx();
    TableDesc tt_desc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(parent.getSchema(), "temporarycol"));
    // generate the temporary file
    Context baseCtx = parseCtx.getContext();
    Path taskTmpDir = baseCtx.getMRTmpPath();
    // Create the temporary file, its corresponding FileSinkOperaotr, and
    // its corresponding TableScanOperator.
    TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(parent, child, taskTmpDir, tt_desc, parseCtx);
    // Add the path to alias mapping
    uCtxTask.addTaskTmpDir(taskTmpDir.toUri().toString());
    uCtxTask.addTTDesc(tt_desc);
    uCtxTask.addListTopOperators(tableScanOp);
    // The union task is empty. The files created for all the inputs are
    // assembled in the union context and later used to initialize the union
    // plan
    Task<?> currTask = ctx.getCurrTask();
    currTask.addDependentTask(uTask);
    if (ctx.getRootTasks().contains(uTask)) {
        ctx.getRootTasks().remove(uTask);
        if (!ctx.getRootTasks().contains(currTask) && shouldBeRootTask(currTask)) {
            ctx.getRootTasks().add(currTask);
        }
    }
}
Also used : ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Context(org.apache.hadoop.hive.ql.Context) UnionProcContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) UnionParseContext(org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext.UnionParseContext) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc)

Example 32 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GlobalLimitOptimizer method transform.

@Override
public ParseContext transform(ParseContext pctx) throws SemanticException {
    Context ctx = pctx.getContext();
    Map<String, TableScanOperator> topOps = pctx.getTopOps();
    GlobalLimitCtx globalLimitCtx = pctx.getGlobalLimitCtx();
    Map<String, SplitSample> nameToSplitSample = pctx.getNameToSplitSample();
    // is used.
    if (topOps.size() == 1 && !globalLimitCtx.ifHasTransformOrUDTF() && nameToSplitSample.isEmpty()) {
        // Here we recursively check:
        // 1. whether there are exact one LIMIT in the query
        // 2. whether there is no aggregation, group-by, distinct, sort by,
        // distributed by, or table sampling in any of the sub-query.
        // The query only qualifies if both conditions are satisfied.
        // 
        // Example qualified queries:
        // CREATE TABLE ... AS SELECT col1, col2 FROM tbl LIMIT ..
        // INSERT OVERWRITE TABLE ... SELECT col1, hash(col2), split(col1)
        // FROM ... LIMIT...
        // SELECT * FROM (SELECT col1 as col2 (SELECT * FROM ...) t1 LIMIT ...) t2);
        // 
        TableScanOperator ts = topOps.values().iterator().next();
        Table tab = ts.getConf().getTableMetadata();
        if (tab.isNonNative()) {
            LOG.info("Not enabling limit optimization on non native table: " + tab.getTableName());
            return pctx;
        }
        // InputFormat.getSplits wont be called if no input path & TS Vertex will have 0 task parallelism
        if (tab.getStorageHandler() == null) {
            LimitOperator tempGlobalLimit = checkQbpForGlobalLimit(ts);
            // query qualify for the optimization
            if (tempGlobalLimit != null) {
                LimitDesc tempGlobalLimitDesc = tempGlobalLimit.getConf();
                Set<FilterOperator> filterOps = OperatorUtils.findOperators(ts, FilterOperator.class);
                if (!tab.isPartitioned()) {
                    if (filterOps.size() == 0) {
                        Integer tempOffset = tempGlobalLimitDesc.getOffset();
                        globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                    }
                } else {
                    // check if the pruner only contains partition columns
                    if (onlyContainsPartnCols(tab, filterOps)) {
                        String alias = (String) topOps.keySet().toArray()[0];
                        PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts);
                        // the filter to prune correctly
                        if (!partsList.hasUnknownPartitions()) {
                            Integer tempOffset = tempGlobalLimitDesc.getOffset();
                            globalLimitCtx.enableOpt(tempGlobalLimitDesc.getLimit(), (tempOffset == null) ? 0 : tempOffset);
                        }
                    }
                }
                if (globalLimitCtx.isEnable()) {
                    LOG.info("Qualify the optimize that reduces input size for 'offset' for offset " + globalLimitCtx.getGlobalOffset());
                    LOG.info("Qualify the optimize that reduces input size for 'limit' for limit " + globalLimitCtx.getGlobalLimit());
                }
            }
        }
    }
    return pctx;
}
Also used : Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) SplitSample(org.apache.hadoop.hive.ql.parse.SplitSample) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) GlobalLimitCtx(org.apache.hadoop.hive.ql.parse.GlobalLimitCtx)

Example 33 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMRFileSink1 method processFS.

/**
 * Process the FileSink operator to generate a MoveTask if necessary.
 *
 * @param fsOp
 *          current FileSink operator
 * @param stack
 *          parent operators
 * @param opProcCtx
 * @param chDir
 *          whether the operator should be first output to a tmp dir and then merged
 *          to the final dir later
 * @return the final file name to which the FileSinkOperator should store.
 * @throws SemanticException
 */
private Path processFS(FileSinkOperator fsOp, Stack<Node> stack, NodeProcessorCtx opProcCtx, boolean chDir) throws SemanticException {
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    Task<?> currTask = ctx.getCurrTask();
    // If the directory needs to be changed, send the new directory
    Path dest = null;
    List<FileSinkOperator> seenFSOps = ctx.getSeenFileSinkOps();
    if (seenFSOps == null) {
        seenFSOps = new ArrayList<FileSinkOperator>();
    }
    if (!seenFSOps.contains(fsOp)) {
        seenFSOps.add(fsOp);
    }
    ctx.setSeenFileSinkOps(seenFSOps);
    dest = GenMapRedUtils.createMoveTask(ctx.getCurrTask(), chDir, fsOp, ctx.getParseCtx(), ctx.getMvTask(), ctx.getConf(), ctx.getDependencyTaskForMultiInsert());
    TableScanOperator currTopOp = ctx.getCurrTopOp();
    String currAliasId = ctx.getCurrAliasId();
    HashMap<Operator<? extends OperatorDesc>, Task<?>> opTaskMap = ctx.getOpTaskMap();
    // If it is a map-only job, the task needs to be processed
    if (currTopOp != null) {
        Task<?> mapTask = opTaskMap.get(null);
        if (mapTask == null) {
            if (!ctx.isSeenOp(currTask, currTopOp)) {
                GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currTask, false, ctx);
            }
            opTaskMap.put(null, currTask);
        } else {
            if (!ctx.isSeenOp(currTask, currTopOp)) {
                GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, mapTask, false, ctx);
            } else {
                UnionOperator currUnionOp = ctx.getCurrUnionOp();
                if (currUnionOp != null) {
                    opTaskMap.put(null, currTask);
                    ctx.setCurrTopOp(null);
                    GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false);
                    return dest;
                }
            }
        // mapTask and currTask should be merged by and join/union operator
        // (e.g., GenMRUnion1) which has multiple topOps.
        // assert mapTask == currTask : "mapTask.id = " + mapTask.getId()
        // + "; currTask.id = " + currTask.getId();
        }
        return dest;
    }
    UnionOperator currUnionOp = ctx.getCurrUnionOp();
    if (currUnionOp != null) {
        opTaskMap.put(null, currTask);
        GenMapRedUtils.initUnionPlan(ctx, currUnionOp, currTask, false);
        return dest;
    }
    return dest;
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Task(org.apache.hadoop.hive.ql.exec.Task) FetchTask(org.apache.hadoop.hive.ql.exec.FetchTask) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 34 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class ConvertJoinMapJoin method checkConvertJoinSMBJoin.

/*
   * This method tries to convert a join to an SMB. This is done based on
   * traits. If the sorted by columns are the same as the join columns then, we
   * can convert the join to an SMB. Otherwise retain the bucket map join as it
   * is still more efficient than a regular join.
   */
private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException {
    ReduceSinkOperator bigTableRS = (ReduceSinkOperator) joinOp.getParentOperators().get(bigTablePosition);
    int numBuckets = bigTableRS.getParentOperators().get(0).getOpTraits().getNumBuckets();
    int size = -1;
    boolean shouldCheckExternalTables = context.conf.getBoolVar(HiveConf.ConfVars.HIVE_DISABLE_UNSAFE_EXTERNALTABLE_OPERATIONS);
    StringBuilder sb = new StringBuilder();
    for (Operator<?> parentOp : joinOp.getParentOperators()) {
        if (shouldCheckExternalTables && hasExternalTableAncestor(parentOp, sb)) {
            LOG.debug("External table {} found in join - disabling SMB join.", sb.toString());
            return false;
        }
        // each side better have 0 or more RS. if either side is unbalanced, cannot convert.
        // This is a workaround for now. Right fix would be to refactor code in the
        // MapRecordProcessor and ReduceRecordProcessor with respect to the sources.
        Set<ReduceSinkOperator> set = OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class);
        if (size < 0) {
            size = set.size();
            continue;
        }
        if (((size > 0) && (set.size() > 0)) || ((size == 0) && (set.size() == 0))) {
            continue;
        } else {
            return false;
        }
    }
    // transformation of the join operation
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        if (!(parentOp instanceof ReduceSinkOperator)) {
            // could be mux/demux operators. Currently not supported
            LOG.debug("Found correlation optimizer operators. Cannot convert to SMB at this time.");
            return false;
        }
        ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
        List<ExprNodeDesc> keyCols = rsOp.getConf().getKeyCols();
        // For SMB, the key column(s) in RS should be same as bucket column(s) and sort column(s)`
        List<String> sortCols = rsOp.getOpTraits().getSortCols().get(0);
        List<String> bucketCols = rsOp.getOpTraits().getBucketColNames().get(0);
        if (sortCols.size() != keyCols.size() || bucketCols.size() != keyCols.size()) {
            return false;
        }
        // Check columns.
        for (int i = 0; i < sortCols.size(); i++) {
            ExprNodeDesc sortCol = rsOp.getColumnExprMap().get(sortCols.get(i));
            ExprNodeDesc bucketCol = rsOp.getColumnExprMap().get(bucketCols.get(i));
            if (!(sortCol.isSame(keyCols.get(i)) && bucketCol.isSame(keyCols.get(i)))) {
                return false;
            }
        }
        // check Parent's traits are same as rs
        OpTraits parentTraits = rsOp.getParentOperators().get(0).getOpTraits();
        if (null == parentTraits) {
            // programming error - shouldn't be null
            return false;
        }
        if (!checkColEquality(parentTraits.getSortCols(), rsOp.getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {
            LOG.info("We cannot convert to SMB because the sort column names do not match.");
            return false;
        }
        if (!checkColEquality(parentTraits.getBucketColNames(), rsOp.getOpTraits().getBucketColNames(), rsOp.getColumnExprMap(), true)) {
            LOG.info("We cannot convert to SMB because bucket column names do not match.");
            return false;
        }
    }
    if (numBuckets < 0) {
        numBuckets = bigTableRS.getConf().getNumReducers();
    }
    tezBucketJoinProcCtx.setNumBuckets(numBuckets);
    // With bucketing using two different versions. Version 1 for exiting
    // tables and version 2 for new tables. All the inputs to the SMB must be
    // from same version. This only applies to tables read directly and not
    // intermediate outputs of joins/groupbys
    int bucketingVersion = -1;
    for (Operator<? extends OperatorDesc> parentOp : joinOp.getParentOperators()) {
        // Check if the parent is coming from a table scan, if so, what is the version of it.
        assert parentOp.getParentOperators() != null && parentOp.getParentOperators().size() == 1;
        Operator<?> op = parentOp;
        while (op != null && !(op instanceof TableScanOperator || op instanceof ReduceSinkOperator || op instanceof CommonJoinOperator)) {
            // If op has parents it is guaranteed to be 1.
            List<Operator<?>> parents = op.getParentOperators();
            Preconditions.checkState(parents.size() == 0 || parents.size() == 1);
            op = parents.size() == 1 ? parents.get(0) : null;
        }
        if (op instanceof TableScanOperator) {
            int localVersion = ((TableScanOperator) op).getConf().getTableMetadata().getBucketingVersion();
            if (bucketingVersion == -1) {
                bucketingVersion = localVersion;
            } else if (bucketingVersion != localVersion) {
                // versions dont match, return false.
                LOG.debug("SMB Join can't be performed due to bucketing version mismatch");
                return false;
            }
        }
    }
    LOG.info("We can convert the join to an SMB join.");
    return true;
}
Also used : CommonMergeJoinOperator(org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) MuxOperator(org.apache.hadoop.hive.ql.exec.MuxOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) TezDummyStoreOperator(org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator) AppMasterEventOperator(org.apache.hadoop.hive.ql.exec.AppMasterEventOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) DummyStoreOperator(org.apache.hadoop.hive.ql.exec.DummyStoreOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) OpTraits(org.apache.hadoop.hive.ql.plan.OpTraits) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 35 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class SparkSkewJoinProcFactory method splitTask.

/**
 * If the join is not in a leaf ReduceWork, the spark task has to be split into 2 tasks.
 */
private static void splitTask(SparkTask currentTask, ReduceWork reduceWork, ParseContext parseContext) throws SemanticException {
    SparkWork currentWork = currentTask.getWork();
    Set<Operator<?>> reduceSinkSet = OperatorUtils.getOp(reduceWork, ReduceSinkOperator.class);
    if (currentWork.getChildren(reduceWork).size() == 1 && canSplit(currentWork) && reduceSinkSet.size() == 1) {
        ReduceSinkOperator reduceSink = (ReduceSinkOperator) reduceSinkSet.iterator().next();
        BaseWork childWork = currentWork.getChildren(reduceWork).get(0);
        SparkEdgeProperty originEdge = currentWork.getEdgeProperty(reduceWork, childWork);
        // disconnect the reduce work from its child. this should produce two isolated sub graphs
        currentWork.disconnect(reduceWork, childWork);
        // move works following the current reduce work into a new spark work
        SparkWork newWork = new SparkWork(parseContext.getConf().getVar(HiveConf.ConfVars.HIVEQUERYID));
        newWork.add(childWork);
        copyWorkGraph(currentWork, newWork, childWork);
        // remove them from current spark work
        for (BaseWork baseWork : newWork.getAllWorkUnsorted()) {
            currentWork.remove(baseWork);
            currentWork.getCloneToWork().remove(baseWork);
        }
        // create TS to read intermediate data
        Context baseCtx = parseContext.getContext();
        Path taskTmpDir = baseCtx.getMRTmpPath();
        Operator<? extends OperatorDesc> rsParent = reduceSink.getParentOperators().get(0);
        TableDesc tableDesc = PlanUtils.getIntermediateFileTableDesc(PlanUtils.getFieldSchemasFromRowSchema(rsParent.getSchema(), "temporarycol"));
        // this will insert FS and TS between the RS and its parent
        TableScanOperator tableScanOp = GenMapRedUtils.createTemporaryFile(rsParent, reduceSink, taskTmpDir, tableDesc, parseContext);
        // create new MapWork
        MapWork mapWork = PlanUtils.getMapRedWork().getMapWork();
        mapWork.setName("Map " + GenSparkUtils.getUtils().getNextSeqNumber());
        newWork.add(mapWork);
        newWork.connect(mapWork, childWork, originEdge);
        // setup the new map work
        String streamDesc = taskTmpDir.toUri().toString();
        if (GenMapRedUtils.needsTagging((ReduceWork) childWork)) {
            Operator<? extends OperatorDesc> childReducer = ((ReduceWork) childWork).getReducer();
            String id = null;
            if (childReducer instanceof JoinOperator) {
                if (parseContext.getJoinOps().contains(childReducer)) {
                    id = ((JoinOperator) childReducer).getConf().getId();
                }
            } else if (childReducer instanceof MapJoinOperator) {
                if (parseContext.getMapJoinOps().contains(childReducer)) {
                    id = ((MapJoinOperator) childReducer).getConf().getId();
                }
            } else if (childReducer instanceof SMBMapJoinOperator) {
                if (parseContext.getSmbMapJoinOps().contains(childReducer)) {
                    id = ((SMBMapJoinOperator) childReducer).getConf().getId();
                }
            }
            if (id != null) {
                streamDesc = id + ":$INTNAME";
            } else {
                streamDesc = "$INTNAME";
            }
            String origStreamDesc = streamDesc;
            int pos = 0;
            while (mapWork.getAliasToWork().get(streamDesc) != null) {
                streamDesc = origStreamDesc.concat(String.valueOf(++pos));
            }
        }
        GenMapRedUtils.setTaskPlan(taskTmpDir, streamDesc, tableScanOp, mapWork, false, tableDesc);
        // insert the new task between current task and its child
        @SuppressWarnings("unchecked") Task<?> newTask = TaskFactory.get(newWork);
        List<Task<?>> childTasks = currentTask.getChildTasks();
        // must have at most one child
        if (childTasks != null && childTasks.size() > 0) {
            Task<?> childTask = childTasks.get(0);
            currentTask.removeDependentTask(childTask);
            newTask.addDependentTask(childTask);
        }
        currentTask.addDependentTask(newTask);
        newTask.setFetchSource(currentTask.isFetchSource());
    }
}
Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Context(org.apache.hadoop.hive.ql.Context) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) Path(org.apache.hadoop.fs.Path) CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) MapJoinOperator(org.apache.hadoop.hive.ql.exec.MapJoinOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) SparkTask(org.apache.hadoop.hive.ql.exec.spark.SparkTask) Task(org.apache.hadoop.hive.ql.exec.Task) SparkWork(org.apache.hadoop.hive.ql.plan.SparkWork) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) MapWork(org.apache.hadoop.hive.ql.plan.MapWork) SparkEdgeProperty(org.apache.hadoop.hive.ql.plan.SparkEdgeProperty) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) BaseWork(org.apache.hadoop.hive.ql.plan.BaseWork)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)133 Operator (org.apache.hadoop.hive.ql.exec.Operator)52 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)32 HashMap (java.util.HashMap)30 Path (org.apache.hadoop.fs.Path)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)29 Table (org.apache.hadoop.hive.ql.metadata.Table)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)24 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 LinkedHashMap (java.util.LinkedHashMap)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)22 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)22 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)21