Search in sources :

Example 16 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestOperatorCmp method testUnrelatedFiltersAreNotMatched1.

@Test
public void testUnrelatedFiltersAreNotMatched1() throws ParseException, CommandProcessorException {
    IDriver driver = createDriver();
    PlanMapper pm0 = getMapperForQuery(driver, "select u from tu where id_uv = 1 group by u");
    PlanMapper pm1 = getMapperForQuery(driver, "select v from tv where id_uv = 1 group by v");
    List<FilterOperator> fos0 = pm0.getAll(FilterOperator.class);
    List<FilterOperator> fos1 = pm1.getAll(FilterOperator.class);
    assertEquals(1, fos0.size());
    assertEquals(1, fos1.size());
    assertFalse("logicalEquals", compareOperators(fos0.get(0), fos1.get(0)));
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) Test(org.junit.Test)

Example 17 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestStatEstimations method testFilterIntIn.

@Test
public void testFilterIntIn() throws ParseException, CommandProcessorException {
    IDriver driver = createDriver();
    String query = "explain select a from t2 where a IN (-1,0,1,2,10,20,30,40) order by a";
    PlanMapper pm = getMapperForQuery(driver, query);
    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
    // the same operator is present 2 times
    fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed());
    assertThat(fos.size(), Matchers.greaterThanOrEqualTo(1));
    FilterOperator fop = fos.get(0);
    // all outside elements should be ignored from stat estimation
    assertEquals(3, fop.getStatistics().getNumRows());
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) Test(org.junit.Test)

Example 18 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class DotExporter method nodeLabel.

private String nodeLabel(Operator<?> n) {
    List<String> rows = new ArrayList<String>();
    rows.add(nodeName0(n));
    if ((n instanceof TableScanOperator)) {
        TableScanOperator ts = (TableScanOperator) n;
        TableScanDesc conf = ts.getConf();
        rows.add(vBox(conf.getTableName(), conf.getAlias()));
    }
    if ((n instanceof FilterOperator)) {
        FilterOperator fil = (FilterOperator) n;
        FilterDesc conf = fil.getConf();
        rows.add(vBox("filter:", escape(conf.getPredicateString())));
    }
    return vBox(rows);
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ArrayList(java.util.ArrayList) TableScanDesc(org.apache.hadoop.hive.ql.plan.TableScanDesc)

Example 19 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class HiveOpConverter method visit.

/**
 * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter
 */
OpAttr visit(HiveFilter filterRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(filterRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + " with row type: [" + filterRel.getRowType() + "]");
    }
    ExprNodeDesc filCondExpr = filterRel.getCondition().accept(new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), inputOpAf.vcolsInCalcite, filterRel.getCluster().getTypeFactory(), true));
    FilterDesc filDesc = new FilterDesc(filCondExpr, false);
    ArrayList<ColumnInfo> cinfoLst = createColInfos(inputOpAf.inputs.get(0));
    FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema(cinfoLst), inputOpAf.inputs.get(0));
    if (LOG.isDebugEnabled()) {
        LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]");
    }
    return inputOpAf.clone(filOp);
}
Also used : FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 20 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class DynamicPartitionPruningOptimization method process.

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException {
    ParseContext parseContext;
    if (procCtx instanceof OptimizeTezProcContext) {
        parseContext = ((OptimizeTezProcContext) procCtx).parseContext;
    } else if (procCtx instanceof OptimizeSparkProcContext) {
        parseContext = ((OptimizeSparkProcContext) procCtx).getParseContext();
    } else {
        throw new IllegalArgumentException("expected parseContext to be either " + "OptimizeTezProcContext or OptimizeSparkProcContext, but found " + procCtx.getClass().getName());
    }
    FilterOperator filter = (FilterOperator) nd;
    FilterDesc desc = filter.getConf();
    if (!parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING) && !parseContext.getConf().isSparkDPPAny()) {
        // nothing to do when the optimization is off
        return null;
    }
    TableScanOperator ts = null;
    if (filter.getParentOperators().size() == 1 && filter.getParentOperators().get(0) instanceof TableScanOperator) {
        ts = (TableScanOperator) filter.getParentOperators().get(0);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Parent: " + filter.getParentOperators().get(0));
        LOG.debug("Filter: " + desc.getPredicateString());
        LOG.debug("TableScan: " + ts);
    }
    DynamicPartitionPrunerContext removerContext = new DynamicPartitionPrunerContext();
    // collect the dynamic pruning conditions
    removerContext.dynLists.clear();
    GenTezUtils.collectDynamicPruningConditions(desc.getPredicate(), removerContext);
    if (ts == null) {
        // Replace the synthetic predicate with true and bail out
        for (DynamicListContext ctx : removerContext) {
            ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
            replaceExprNode(ctx, desc, constNode);
        }
        return false;
    }
    boolean semiJoin = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
    if (HiveConf.getVar(parseContext.getConf(), HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
        // TODO HIVE-16862: Implement a similar feature like "hive.tez.dynamic.semijoin.reduction" in hive on spark
        semiJoin = false;
    }
    List<ExprNodeDesc> newBetweenNodes = new ArrayList<>();
    List<ExprNodeDesc> newBloomFilterNodes = new ArrayList<>();
    for (DynamicListContext ctx : removerContext) {
        if (ctx.desc.getTypeInfo().getCategory() != ObjectInspector.Category.PRIMITIVE) {
            // https://issues.apache.org/jira/browse/HIVE-24988
            continue;
        }
        String column = ExprNodeDescUtils.extractColName(ctx.parent);
        boolean semiJoinAttempted = false;
        ExprNodeDesc constNode = new ExprNodeConstantDesc(ctx.parent.getTypeInfo(), true);
        if (column != null) {
            // Need unique IDs to refer to each min/max key value in the DynamicValueRegistry
            String keyBaseAlias = "";
            Table table = ts.getConf().getTableMetadata();
            boolean nonEquiJoin = isNonEquiJoin(ctx.parent);
            if (table != null && table.isPartitionKey(column) && !nonEquiJoin) {
                String columnType = table.getPartColByName(column).getType();
                String alias = ts.getConf().getAlias();
                PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
                if (LOG.isDebugEnabled()) {
                    LOG.debug("alias: " + alias);
                    LOG.debug("pruned partition list: ");
                    if (plist != null) {
                        for (Partition p : plist.getPartitions()) {
                            LOG.debug(p.getCompleteName());
                        }
                    }
                }
                // have been already filtered
                if (plist == null || plist.getPartitions().size() != 0) {
                    LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
                    generateEventOperatorPlan(ctx, parseContext, ts, column, columnType, null);
                } else {
                    // all partitions have been statically removed
                    LOG.debug("No partition pruning necessary.");
                }
            } else if (table.isNonNative() && table.getStorageHandler().addDynamicSplitPruningEdge(table, ctx.parent)) {
                generateEventOperatorPlan(ctx, parseContext, ts, column, table.getCols().stream().filter(e -> e.getName().equals(column)).map(e -> e.getType()).findFirst().get(), ctx.parent);
            } else {
                // semijoin
                LOG.debug("Column " + column + " is not a partition column");
                if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) && ts.getConf().getFilterExpr() != null && !nonEquiJoin) {
                    LOG.debug("Initiate semijoin reduction for " + column + " (" + ts.getConf().getFilterExpr().getExprString());
                    StringBuilder internalColNameBuilder = new StringBuilder();
                    StringBuilder colNameBuilder = new StringBuilder();
                    // Apply best effort to fetch the correct table alias. If not
                    // found, fallback to old logic.
                    StringBuilder tabAliasBuilder = new StringBuilder();
                    if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder, tabAliasBuilder)) {
                        String colName = colNameBuilder.toString();
                        String tableAlias;
                        if (tabAliasBuilder.length() > 0) {
                            tableAlias = tabAliasBuilder.toString();
                        } else {
                            // falling back
                            Operator<?> op = ctx.generator;
                            while (!(op == null || op instanceof TableScanOperator)) {
                                op = op.getParentOperators().get(0);
                            }
                            tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
                        }
                        // Use the tableAlias to generate keyBaseAlias
                        keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias + "_" + colName;
                        Map<String, List<SemiJoinHint>> hints = parseContext.getSemiJoinHints();
                        if (hints != null) {
                            // Create semijoin optimizations ONLY for hinted columns
                            semiJoinAttempted = processSemiJoinHints(parseContext, ctx, hints, tableAlias, internalColNameBuilder.toString(), colName, ts, keyBaseAlias);
                        } else {
                            // fallback to regular logic
                            semiJoinAttempted = generateSemiJoinOperatorPlan(ctx, parseContext, ts, keyBaseAlias, internalColNameBuilder.toString(), colName, null);
                        }
                    }
                }
            }
            // we always remove the condition by replacing it with "true"
            if (semiJoinAttempted) {
                List<ExprNodeDesc> betweenArgs = new ArrayList<ExprNodeDesc>();
                // Do not invert between result
                betweenArgs.add(new ExprNodeConstantDesc(Boolean.FALSE));
                // add column expression here
                betweenArgs.add(ctx.parent.getChildren().get(0));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_min", ctx.desc.getTypeInfo())));
                betweenArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_max", ctx.desc.getTypeInfo())));
                ExprNodeDesc betweenNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("between").getGenericUDF(), betweenArgs);
                // add column expression for bloom filter
                List<ExprNodeDesc> bloomFilterArgs = new ArrayList<ExprNodeDesc>();
                bloomFilterArgs.add(ctx.parent.getChildren().get(0));
                bloomFilterArgs.add(new ExprNodeDynamicValueDesc(new DynamicValue(keyBaseAlias + "_bloom_filter", TypeInfoFactory.binaryTypeInfo)));
                ExprNodeDesc bloomFilterNode = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in_bloom_filter").getGenericUDF(), bloomFilterArgs);
                newBetweenNodes.add(betweenNode);
                newBloomFilterNodes.add(bloomFilterNode);
            }
        }
        replaceExprNode(ctx, desc, constNode);
    }
    if (!newBetweenNodes.isEmpty()) {
        // We need to add the new nodes: first the between nodes, then the bloom filters
        if (FunctionRegistry.isOpAnd(desc.getPredicate())) {
            // AND
            desc.getPredicate().getChildren().addAll(newBetweenNodes);
            desc.getPredicate().getChildren().addAll(newBloomFilterNodes);
        } else {
            List<ExprNodeDesc> andArgs = new ArrayList<>();
            andArgs.add(desc.getPredicate());
            andArgs.addAll(newBetweenNodes);
            andArgs.addAll(newBloomFilterNodes);
            ExprNodeGenericFuncDesc andExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs);
            // Also pass in filter as tableScan filterExpr
            ts.getConf().setFilterExpr(andExpr);
            desc.setPredicate(andExpr);
        }
    }
    // if we pushed the predicate into the table scan we need to remove the
    // synthetic conditions there.
    cleanTableScanFilters(ts);
    return false;
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) Arrays(java.util.Arrays) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) SemanticAnalyzer(org.apache.hadoop.hive.ql.parse.SemanticAnalyzer) CombineEquivalentWorkResolver(org.apache.hadoop.hive.ql.optimizer.spark.CombineEquivalentWorkResolver) ConfVars(org.apache.hadoop.hive.conf.HiveConf.ConfVars) LoggerFactory(org.slf4j.LoggerFactory) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) RuntimeValuesInfo(org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo) Map(java.util.Map) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SemiJoinBranchInfo(org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo) NodeProcessorCtx(org.apache.hadoop.hive.ql.lib.NodeProcessorCtx) EnumSet(java.util.EnumSet) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) DynamicPartitionPrunerContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicPartitionPrunerContext) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) DynamicPruningEventDesc(org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc) SparkUtilities(org.apache.hadoop.hive.ql.exec.spark.SparkUtilities) List(java.util.List) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) GenericUDAFBloomFilterEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext) Mode(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) GenericUDFIn(org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn) HashMap(java.util.HashMap) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) SemanticNodeProcessor(org.apache.hadoop.hive.ql.lib.SemanticNodeProcessor) Stack(java.util.Stack) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) ArrayList(java.util.ArrayList) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Operation(org.apache.hadoop.hive.ql.io.AcidUtils.Operation) PlanUtils(org.apache.hadoop.hive.ql.plan.PlanUtils) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) NullOrdering(org.apache.hadoop.hive.ql.util.NullOrdering) Logger(org.slf4j.Logger) TypeInfoFactory(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Table(org.apache.hadoop.hive.ql.metadata.Table) GenTezUtils(org.apache.hadoop.hive.ql.parse.GenTezUtils) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) Node(org.apache.hadoop.hive.ql.lib.Node) Partition(org.apache.hadoop.hive.ql.metadata.Partition) SparkPartitionPruningSinkDesc(org.apache.hadoop.hive.ql.optimizer.spark.SparkPartitionPruningSinkDesc) DynamicListContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicListContext) OperatorFactory(org.apache.hadoop.hive.ql.exec.OperatorFactory) Preconditions(com.google.common.base.Preconditions) ExprNodeDescUtils(org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils) Collections(java.util.Collections) SparkPartitionPruningSinkOperator(org.apache.hadoop.hive.ql.parse.spark.SparkPartitionPruningSinkOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) ExprNodeDynamicValueDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc) ArrayList(java.util.ArrayList) SemiJoinHint(org.apache.hadoop.hive.ql.parse.SemiJoinHint) PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) OptimizeSparkProcContext(org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Partition(org.apache.hadoop.hive.ql.metadata.Partition) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Table(org.apache.hadoop.hive.ql.metadata.Table) DynamicListContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicListContext) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) OptimizeTezProcContext(org.apache.hadoop.hive.ql.parse.OptimizeTezProcContext) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ParseContext(org.apache.hadoop.hive.ql.parse.ParseContext) DynamicPartitionPrunerContext(org.apache.hadoop.hive.ql.parse.GenTezUtils.DynamicPartitionPrunerContext) Map(java.util.Map) HashMap(java.util.HashMap) DynamicValue(org.apache.hadoop.hive.ql.plan.DynamicValue)

Aggregations

FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 IDriver (org.apache.hadoop.hive.ql.IDriver)12 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)12 PlanMapper (org.apache.hadoop.hive.ql.plan.mapper.PlanMapper)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 List (java.util.List)6 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)6 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 HashMap (java.util.HashMap)5