Search in sources :

Example 96 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class DynamicPartitionPruningOptimization method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    ParseContext parseContext;
    if (procCtx instanceof OptimizeTezProcContext) {
        parseContext = ((OptimizeTezProcContext) procCtx).parseContext;
    } else if (procCtx instanceof OptimizeSparkProcContext) {
        parseContext = ((OptimizeSparkProcContext) procCtx).getParseContext();
    } else {
        throw new IllegalArgumentException("expected parseContext to be either " + "OptimizeTezProcContext or OptimizeSparkProcContext, but found " + procCtx.getClass().getName());
    }
    FilterOperator filter = (FilterOperator) nd;
    FilterDesc desc = filter.getConf();
    if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) && !parseContext.getConf().isSparkDPPAny()) {
        // nothing to do when the optimization is off
        return null;
    }
    TableScanOperator ts = null;
    if (filter.getParentOperators().size() == 1 && filter.getParentOperators().get(0) instanceof TableScanOperator) {
        ts = (TableScanOperator) filter.getParentOperators().get(0);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Parent: " + filter.getParentOperators().get(0));
        LOG.debug("Filter: " + desc.getPredicateString());
        LOG.debug("TableScan: " + ts);
    }
    DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
    // collect the dynamic pruning conditions
    removerContext.dynLists.clear();
    GenTezUtils.collectDynamicPruningConditions(desc.getPredicate(), removerContext);
    if (ts == null) {
        // Replace the synthetic predicate with true and bail out
        for (DynamicListContext ctx : removerContext) {
            ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
            replaceExprNode(ctx, desc, constNode);
        }
        return false;
    }
    boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
    if (HiveConf.getVar(parseContext.getConf(), HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
        // TODO HIVE-16862: Implement a similar feature like "hive.tez.dynamic.semijoin.reduction" in hive on spark
        semiJoin = false;
    }
    List<ExprNodeDesc> newBetweenNodes = new ArrayList<>();
    List<ExprNodeDesc> newBloomFilterNodes = new ArrayList<>();
    for (DynamicListContext ctx : removerContext) {
        if (ctx.desc.getTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE) {
            // https://issues.apache.org/jira/browse/HIVE-24988
            continue;
        }
        String column = ExprNodeDescUtils.extractColName(ctx.parent);
        boolean semiJoinAttempted = false;
        ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
        if (column != null) {
            // Need unique IDs to refer to each min/max key value in the DynamicValueRegistry
            String keyBaseAlias = "";
            Table table = ts.getConf().getTableMetadata();
            boolean nonEquiJoin = isNonEquiJoin(ctx.parent);
            if (table != null && table.isPartitionKey(column) && !nonEquiJoin) {
                String columnType = table.getPartColByName(column).getType();
                String alias = ts.getConf().getAlias();
                PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("alias: " + alias);
                    LOG.debug("pruned partition list: ");
                    if (plist != null) {
                        for (Partition p : plist.getPartitions()) {
                            LOG.debug(p.getCompleteName());
                        }
                    }
                }
                // have been already filtered
                if (plist == null || plist.getPartitions().size() != 0) {
                    LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
                    generateEventOperatorPlan(ctx, parseContext, ts, column, columnType, null);
                } else {
                    // all partitions have been statically removed
                    LOG.debug("No partition pruning necessary.");
                }
            } else if (table.isNonNative() && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) {
                generateEventOperatorPlan(ctx, parseContext, ts, column, table.getCols().stream().filter(e -> e.getName().equals(column)).map(e -> e.getType()).findFirst().get(), ctx.parent);
            } else {
                // semijoin
                LOG.debug("Column " + column + " is not a partition column");
                if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) && ts.getConf().getFilterExpr() != null && !nonEquiJoin) {
                    LOG.debug("Initiate semijoin reduction for " + column + " (" + ts.getConf().getFilterExpr().getExprString());
                    StringBuilder internalColNameBuilder = new StringBuilder();
                    StringBuilder colNameBuilder = new StringBuilder();
                    // Apply best effort to fetch the correct table alias. If not
                    // found, fallback to old logic.
                    StringBuilder tabAliasBuilder = new StringBuilder();
                    if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder, tabAliasBuilder)) {
                        String colName = colNameBuilder.toString();
                        String tableAlias;
                        if (tabAliasBuilder.length() > 0) {
                            tableAlias = tabAliasBuilder.toString();
                        } else {
                            // falling back
                            Operator<?> op = ctx.generator;
                            while (!(op == null || op instanceof TableScanOperator)) {
                                op = op.getParentOperators().get(0);
                            }
                            tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
                        }
                        // Use the tableAlias to generate keyBaseAlias
                        keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + colName;
                        Map<String, List<SemiJoinHint>> hints = parseContext.getSemiJoinHints();
                        if (hints != null) {
                            // Create semijoin optimizations ONLY for hinted columns
                            semiJoinAttempted = processSemiJoinHints(parseContext, ctx, hints, tableAlias, internalColNameBuilder.toString(), colName, ts, keyBaseAlias);
                        } else {
                            // fallback to regular logic
                            semiJoinAttempted = generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias, internalColNameBuilder.toString(), colName, null);
                        }
                    }
                }
            }
            // we always remove the condition by replacing it with "true"
            if (semiJoinAttempted) {
                List<ExprNodeDesc> betweenArgs = new ArrayList<ExprNodeDesc>();
                // Do not invert between result
                betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE));
                // add column expression here
                betweenArgs.add(ctx.parent.getChildren().get(0));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo())));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo())));
                ExprNodeDesc betweenNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs);
                // add column expression for bloom filter
                List<ExprNodeDesc> bloomFilterArgs = new ArrayList<ExprNodeDesc>();
                bloomFilterArgs.add(ctx.parent.getChildren().get(0));
                bloomFilterArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_bloom_filter", TypeInfoFactory.binaryTypeInfo)));
                ExprNodeDesc bloomFilterNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in_bloom_filter").getGenericUDF(), bloomFilterArgs);
                newBetweenNodes.add(betweenNode);
                newBloomFilterNodes.add(bloomFilterNode);
            }
        }
        replaceExprNode(ctx, desc, constNode);
    }
    if (!newBetweenNodes.isEmpty()) {
        // We need to add the new nodes: first the between nodes, then the bloom filters
        if (FunctionRegistry.isOpAnd(desc.getPredicate())) {
            // AND
            desc.getPredicate().getChildren().addAll(newBetweenNodes);
            desc.getPredicate().getChildren().addAll(newBloomFilterNodes);
        } else {
            List<ExprNodeDesc> andArgs = new ArrayList<>();
            andArgs.add(desc.getPredicate());
            andArgs.addAll(newBetweenNodes);
            andArgs.addAll(newBloomFilterNodes);
            ExprNodeGenericFuncDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs);
            // Also pass in filter as tableScan filterExpr
            ts.getConf().setFilterExpr(andExpr);
            desc.setPredicate(andExpr);
        }
    }
    // if we pushed the predicate into the table scan we need to remove the
    // synthetic conditions there.
    cleanTableScanFilters(ts);
    return false;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Arrays(java.util.Arrays) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) Map(java.util.Map) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) EnumSet(java.util.EnumSet) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) DynamicPartitionPrunerContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicPartitionPrunerContext) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SparkUtilities(org.apache.hadoop.hive.ql.exec.spark.SparkUtilities) List(java.util.List) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) HashMap(java.util.HashMap) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) Stack(java.util.Stack) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) ArrayList(java.util.ArrayList) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) PlanUtils(org.apache.hadoop.hive.ql.plan.PlanUtils) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) NullOrdering(org.apache.hadoop.hive.ql.util.NullOrdering) Logger(org.slf4j.Logger) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Table(org.apache.hadoop.hive.ql.metadata.Table) GenTezUtils(org.apache.hadoop.hive.ql.parse.GenTezUtils) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Node(org.apache.hadoop.hive.ql.lib.Node) Partition(org.apache.hadoop.hive.ql.metadata.Partition) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) DynamicListContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicListContext) OperatorFactory(org.apache.hadoop.hive.ql.exec.OperatorFactory) Preconditions(com.google.common.base.Preconditions) ExprNodeDescUtils(org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils) Collections(java.util.Collections) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) DynamicListContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicListContext) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) DynamicPartitionPrunerContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicPartitionPrunerContext) Map(java.util.Map) HashMap(java.util.HashMap) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue)

Example 97 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMRTableScan1 method process.

/**
 * Table Sink encountered.
 * @param nd
 *          the table sink operator encountered
 * @param opProcCtx
 *          context
 */
@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx opProcCtx, Object... nodeOutputs) throws SemanticException {
    TableScanOperator op = (TableScanOperator) nd;
    GenMRProcContext ctx = (GenMRProcContext) opProcCtx;
    ctx.reset();
    ParseContext parseCtx = ctx.getParseCtx();
    Table table = op.getConf().getTableMetadata();
    Class<? extends InputFormat> inputFormat = table.getInputFormatClass();
    Map<Operator<? extends OperatorDesc>, GenMapRedCtx> mapCurrCtx = ctx.getMapCurrCtx();
    // create a dummy MapReduce task
    MapredWork currWork = GenMapRedUtils.getMapRedWork(parseCtx);
    MapRedTask currTask = (MapRedTask) TaskFactory.get(currWork);
    ctx.setCurrTask(currTask);
    ctx.setCurrTopOp(op);
    for (String alias : parseCtx.getTopOps().keySet()) {
        Operator<? extends OperatorDesc> currOp = parseCtx.getTopOps().get(alias);
        if (currOp == op) {
            String currAliasId = alias;
            ctx.setCurrAliasId(currAliasId);
            mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId));
            if (parseCtx.getQueryProperties().isAnalyzeCommand()) {
                boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand();
                if (BasicStatsNoJobTask.canUseBasicStats(table, inputFormat)) {
                    // For ORC and Parquet, all the following statements are the same
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
                    // There will not be any MR or Tez job above this task
                    StatsWork statWork = new StatsWork(table, parseCtx.getConf());
                    statWork.setFooterScan();
                    // If partition is specified, get pruned partition list
                    Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(op);
                    if (confirmedParts.size() > 0) {
                        List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
                        PrunedPartitionList partList = new PrunedPartitionList(table, confirmedParts, partCols, false);
                        statWork.addInputPartitions(partList.getPartitions());
                    }
                    Task<StatsWork> snjTask = TaskFactory.get(statWork);
                    ctx.setCurrTask(snjTask);
                    ctx.setCurrTopOp(null);
                    ctx.getRootTasks().clear();
                    ctx.getRootTasks().add(snjTask);
                } else {
                    // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS;
                    // The plan consists of a simple MapRedTask followed by a StatsTask.
                    // The MR task is just a simple TableScanOperator
                    BasicStatsWork statsWork = new BasicStatsWork(table.getTableSpec());
                    statsWork.setIsExplicitAnalyze(true);
                    statsWork.setNoScanAnalyzeCommand(noScan);
                    StatsWork columnStatsWork = new StatsWork(table, statsWork, parseCtx.getConf());
                    columnStatsWork.collectStatsFromAggregator(op.getConf());
                    columnStatsWork.setSourceTask(currTask);
                    Task<StatsWork> columnStatsTask = TaskFactory.get(columnStatsWork);
                    currTask.addDependentTask(columnStatsTask);
                    if (!ctx.getRootTasks().contains(currTask)) {
                        ctx.getRootTasks().add(currTask);
                    }
                    // The plan consists of a StatsTask only.
                    if (noScan) {
                        columnStatsTask.setParentTasks(null);
                        ctx.getRootTasks().remove(currTask);
                        ctx.getRootTasks().add(columnStatsTask);
                    }
                    currWork.getMapWork().setGatheringStats(true);
                    if (currWork.getReduceWork() != null) {
                        currWork.getReduceWork().setGatheringStats(true);
                    }
                    // NOTE: here we should use the new partition predicate pushdown API to get a list of
                    // pruned list,
                    // and pass it to setTaskPlan as the last parameter
                    Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(op);
                    if (confirmedPartns.size() > 0) {
                        List<String> partCols = GenMapRedUtils.getPartitionColumns(op);
                        PrunedPartitionList partList = new PrunedPartitionList(table, confirmedPartns, partCols, false);
                        GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx, partList);
                    } else {
                        // non-partitioned table
                        GenMapRedUtils.setTaskPlan(currAliasId, op, currTask, false, ctx);
                    }
                }
            }
            return true;
        }
    }
    assert false;
    return null;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Partition(org.apache.hadoop.hive.ql.metadata.Partition) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Table(org.apache.hadoop.hive.ql.metadata.Table) MapRedTask(org.apache.hadoop.hive.ql.exec.mr.MapRedTask) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) MapredWork(org.apache.hadoop.hive.ql.plan.MapredWork) StatsWork(org.apache.hadoop.hive.ql.plan.StatsWork) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) GenMapRedCtx(org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx) BasicStatsWork(org.apache.hadoop.hive.ql.plan.BasicStatsWork) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Example 98 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMapRedUtils method setTaskPlan.

/**
 * set the current task in the mapredWork.
 *
 * @param alias
 *          current alias
 * @param topOp
 *          the top operator of the stack
 * @param plan
 *          current plan
 * @param local
 *          whether you need to add to map-reduce or local work
 * @param ttDesc
 *          table descriptor
 * @throws SemanticException
 */
public static void setTaskPlan(Path path, String alias, Operator<? extends OperatorDesc> topOp, MapWork plan, boolean local, TableDesc ttDesc) throws SemanticException {
    if (path == null || alias == null) {
        return;
    }
    if (topOp instanceof TableScanOperator) {
        try {
            Utilities.addSchemaEvolutionToTableScanOperator((StructObjectInspector) ttDesc.getSerDe().getObjectInspector(), (TableScanOperator) topOp);
        } catch (Exception e) {
            throw new SemanticException(e);
        }
    }
    if (!local) {
        plan.addPathToAlias(path, alias);
        plan.addPathToPartitionInfo(path, new PartitionDesc(ttDesc, null));
        plan.getAliasToWork().put(alias, topOp);
    } else {
        // populate local work if needed
        MapredLocalWork localPlan = plan.getMapRedLocalWork();
        if (localPlan == null) {
            localPlan = new MapredLocalWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(), new LinkedHashMap<String, FetchWork>());
        }
        assert localPlan.getAliasToWork().get(alias) == null;
        assert localPlan.getAliasToFetchWork().get(alias) == null;
        localPlan.getAliasToWork().put(alias, topOp);
        localPlan.getAliasToFetchWork().put(alias, new FetchWork(new Path(alias), ttDesc));
        plan.setMapRedLocalWork(localPlan);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) MapredLocalWork(org.apache.hadoop.hive.ql.plan.MapredLocalWork) FetchWork(org.apache.hadoop.hive.ql.plan.FetchWork) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) LinkedHashMap(java.util.LinkedHashMap)

Example 99 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMapRedUtils method createTemporaryTableScanOperator.

public static TableScanOperator createTemporaryTableScanOperator(CompilationOpContext ctx, RowSchema rowSchema) {
    TableScanOperator tableScanOp = (TableScanOperator) OperatorFactory.get(ctx, new TableScanDesc(null), rowSchema);
    // Set needed columns for this dummy TableScanOperator
    List<Integer> neededColumnIds = new ArrayList<Integer>();
    List<String> neededColumnNames = new ArrayList<String>();
    List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
    for (int i = 0; i < parentColumnInfos.size(); i++) {
        neededColumnIds.add(i);
        neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
    }
    tableScanOp.setNeededColumnIDs(neededColumnIds);
    tableScanOp.setNeededColumns(neededColumnNames);
    tableScanOp.setReferencedColumns(neededColumnNames);
    return tableScanOp;
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo)

Example 100 with TableScanOperator

use of org.apache.hadoop.hive.ql.exec.TableScanOperator in project hive by apache.

the class GenMRUnion1 method processSubQueryUnionMerge.

/**
 * Union Operator encountered. A map-only query is encountered at the given
 * position. However, at least one sub-query is a map-reduce job. Copy the
 * information from the current top operator to the union context.
 *
 * @param ctx
 * @param uCtxTask
 * @param union
 * @param stack
 * @throws SemanticException
 */
private void processSubQueryUnionMerge(GenMRProcContext ctx, GenMRUnionCtx uCtxTask, UnionOperator union, Stack<Node> stack) throws SemanticException {
    // The current plan can be thrown away after being merged with the union
    // plan
    Task<?> uTask = uCtxTask.getUTask();
    ctx.setCurrTask(uTask);
    TableScanOperator topOp = ctx.getCurrTopOp();
    if (topOp != null && !ctx.isSeenOp(uTask, topOp)) {
        GenMapRedUtils.setTaskPlan(ctx.getCurrAliasId(), ctx.getCurrTopOp(), uTask, false, ctx);
    }
}
Also used : TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator)

Aggregations

TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)133 Operator (org.apache.hadoop.hive.ql.exec.Operator)52 ArrayList (java.util.ArrayList)47 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)44 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)36 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)35 FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)32 HashMap (java.util.HashMap)30 Path (org.apache.hadoop.fs.Path)30 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)29 Table (org.apache.hadoop.hive.ql.metadata.Table)26 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)25 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)24 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)24 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)23 LinkedHashMap (java.util.LinkedHashMap)22 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)22 MapWork (org.apache.hadoop.hive.ql.plan.MapWork)22 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)22 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)21