Search in sources :

Example 11 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class SemanticAnalyzer method genNotNullFilterForJoinSourcePlan.

/*
   * for inner joins push a 'is not null predicate' to the join sources for
   * every non nullSafe predicate.
   */
private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, QBJoinTree joinTree, ExprNodeDesc[] joinKeys) throws SemanticException {
    if (qb == null || joinTree == null) {
        return input;
    }
    if (!joinTree.getNoOuterJoin()) {
        return input;
    }
    if (joinKeys == null || joinKeys.length == 0) {
        return input;
    }
    Multimap<Integer, ExprNodeColumnDesc> hashes = ArrayListMultimap.create();
    if (input instanceof FilterOperator) {
        ExprNodeDescUtils.getExprNodeColumnDesc(Arrays.asList(((FilterDesc) input.getConf()).getPredicate()), hashes);
    }
    ExprNodeDesc filterPred = null;
    List<Boolean> nullSafes = joinTree.getNullSafes();
    for (int i = 0; i < joinKeys.length; i++) {
        if (nullSafes.get(i) || (joinKeys[i] instanceof ExprNodeColumnDesc && ((ExprNodeColumnDesc) joinKeys[i]).getIsPartitionColOrVirtualCol())) {
            // virtual column, since those columns can never be null.
            continue;
        }
        boolean skip = false;
        for (ExprNodeColumnDesc node : hashes.get(joinKeys[i].hashCode())) {
            if (node.isSame(joinKeys[i])) {
                skip = true;
                break;
            }
        }
        if (skip) {
            // there is already a predicate on this src.
            continue;
        }
        List<ExprNodeDesc> args = new ArrayList<ExprNodeDesc>();
        args.add(joinKeys[i]);
        ExprNodeDesc nextExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("isnotnull").getGenericUDF(), args);
        filterPred = filterPred == null ? nextExpr : ExprNodeDescUtils.mergePredicates(filterPred, nextExpr);
    }
    if (filterPred == null) {
        return input;
    }
    OpParseContext inputCtx = opParseCtx.get(input);
    RowResolver inputRR = inputCtx.getRowResolver();
    if (input instanceof FilterOperator) {
        FilterOperator f = (FilterOperator) input;
        List<ExprNodeDesc> preds = new ArrayList<ExprNodeDesc>();
        preds.add(f.getConf().getPredicate());
        preds.add(filterPred);
        f.getConf().setPredicate(ExprNodeDescUtils.mergePredicates(preds));
        return input;
    }
    FilterDesc filterDesc = new FilterDesc(filterPred, false);
    filterDesc.setGenerated(true);
    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(filterDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR);
    LOG.debug("Created Filter Plan for {} row schema: {}", qb.getId(), inputRR);
    return output;
}
Also used : AbstractMapJoinOperator(org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) GroupByOperator(org.apache.hadoop.hive.ql.exec.GroupByOperator) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) UnionOperator(org.apache.hadoop.hive.ql.exec.UnionOperator) SMBMapJoinOperator(org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator) RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) FilterDesc(org.apache.hadoop.hive.ql.plan.FilterDesc) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 12 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestCounterMapping method testBreakupAnd2.

@Test
public void testBreakupAnd2() throws ParseException, CommandProcessorException {
    String query = "explain select sum(id_uv) from tu where u in (1,2,3) and u=2 and u=2 and 2=u group by u";
    IDriver driver = createDriver();
    PlanMapper pm = getMapperForQuery(driver, query);
    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
    OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0));
    Object pred = filterSig.getSig().getSigMap().get("getPredicateString");
    assertEquals("(u = 2) (type: boolean)", pred);
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) OpTreeSignature(org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) Test(org.junit.Test)

Example 13 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestCounterMapping method testMappingJoinLookup.

@Test
@Ignore("needs HiveFilter mapping")
public void testMappingJoinLookup() throws ParseException, CommandProcessorException {
    IDriver driver = createDriver();
    PlanMapper pm0 = getMapperForQuery(driver, "select sum(tu.id_uv), sum(u)\n" + "from tu join tv on (tu.id_uv = tv.id_uv)\n" + "where u > 1 and v > 1");
    Iterator<EquivGroup> itG = pm0.iterateGroups();
    int checkedOperators = 0;
    while (itG.hasNext()) {
        EquivGroup g = itG.next();
        List<HiveFilter> hfs = g.getAll(HiveFilter.class);
        List<OperatorStats> oss = g.getAll(OperatorStats.class);
        List<FilterOperator> fos = g.getAll(FilterOperator.class);
        if (fos.size() > 0 && oss.size() > 0) {
            if (hfs.size() == 0) {
                fail("HiveFilter is not connected?");
            }
            OperatorStats os = oss.get(0);
            if (!(os.getOutputRecords() == 3 || os.getOutputRecords() == 6)) {
                fail("nonexpected number of records produced");
            }
            checkedOperators++;
        }
    }
    assertEquals(2, checkedOperators);
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) EquivGroup(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup) OperatorStats(org.apache.hadoop.hive.ql.stats.OperatorStats) HiveFilter(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 14 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestReOptimization method testStatsAreSetInReopt.

@Test
public void testStatsAreSetInReopt() throws Exception {
    IDriver driver = createDriver("overlay,reoptimize");
    String query = "select assert_true_oom(${hiveconf:zzz} > sum(u*v))" + " from tu join tv on (tu.id_uv=tv.id_uv)" + " where u<10 and v>1";
    PlanMapper pm = getMapperForQuery(driver, query);
    Iterator<EquivGroup> itG = pm.iterateGroups();
    int checkedOperators = 0;
    while (itG.hasNext()) {
        EquivGroup g = itG.next();
        List<FilterOperator> fos = g.getAll(FilterOperator.class);
        List<OperatorStats> oss = g.getAll(OperatorStats.class);
        if (fos.size() > 0 && oss.size() > 0) {
            fos.sort(TestCounterMapping.OPERATOR_ID_COMPARATOR.reversed());
            FilterOperator fo = fos.get(0);
            OperatorStats os = oss.get(0);
            Statistics stats = fo.getStatistics();
            assertEquals(os.getOutputRecords(), stats.getNumRows());
            if (!(os.getOutputRecords() == 3 || os.getOutputRecords() == 6)) {
                fail("nonexpected number of records produced");
            }
            checkedOperators++;
        }
    }
    assertEquals(2, checkedOperators);
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) EquivGroup(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup) OperatorStats(org.apache.hadoop.hive.ql.stats.OperatorStats) Statistics(org.apache.hadoop.hive.ql.plan.Statistics) Test(org.junit.Test)

Example 15 with FilterOperator

use of org.apache.hadoop.hive.ql.exec.FilterOperator in project hive by apache.

the class TestOperatorCmp method testUnrelatedFiltersAreNotMatched0.

@Test
public void testUnrelatedFiltersAreNotMatched0() throws ParseException, CommandProcessorException {
    IDriver driver = createDriver();
    String query = "select u from tu where id_uv = 1 union all select v from tv where id_uv = 1";
    PlanMapper pm = getMapperForQuery(driver, query);
    List<FilterOperator> fos = pm.getAll(FilterOperator.class);
    // the same operator is present 2 times
    assertEquals(4, fos.size());
    int cnt = 0;
    for (int i = 0; i < 3; i++) {
        for (int j = i + 1; j < 4; j++) {
            if (compareOperators(fos.get(i), fos.get(j))) {
                cnt++;
            }
        }
    }
    assertEquals(2, cnt);
}
Also used : FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) PlanMapper(org.apache.hadoop.hive.ql.plan.mapper.PlanMapper) IDriver(org.apache.hadoop.hive.ql.IDriver) Test(org.junit.Test)

Aggregations

FilterOperator (org.apache.hadoop.hive.ql.exec.FilterOperator)34 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)16 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)13 IDriver (org.apache.hadoop.hive.ql.IDriver)12 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)12 FilterDesc (org.apache.hadoop.hive.ql.plan.FilterDesc)12 PlanMapper (org.apache.hadoop.hive.ql.plan.mapper.PlanMapper)12 Test (org.junit.Test)12 ArrayList (java.util.ArrayList)10 Operator (org.apache.hadoop.hive.ql.exec.Operator)10 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)9 UnionOperator (org.apache.hadoop.hive.ql.exec.UnionOperator)9 List (java.util.List)6 AppMasterEventOperator (org.apache.hadoop.hive.ql.exec.AppMasterEventOperator)6 DummyStoreOperator (org.apache.hadoop.hive.ql.exec.DummyStoreOperator)6 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)6 MapJoinOperator (org.apache.hadoop.hive.ql.exec.MapJoinOperator)6 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)6 HashMap (java.util.HashMap)5