Search in sources :

Example 56 with InDimFilter

use of org.apache.druid.query.filter.InDimFilter in project druid by druid-io.

the class CachingClusteredClientTest method testTimeSeriesWithFilter.

@Test
public void testTimeSeriesWithFilter() {
    DimFilter filter = new AndDimFilter(new OrDimFilter(new SelectorDimFilter("dim0", "1", null), new BoundDimFilter("dim0", "222", "333", false, false, false, null, StringComparators.LEXICOGRAPHIC)), new AndDimFilter(new InDimFilter("dim1", Arrays.asList("0", "1", "2", "3", "4"), null), new BoundDimFilter("dim1", "0", "3", false, true, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim1", "1", "9999", true, false, false, null, StringComparators.LEXICOGRAPHIC)));
    final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).intervals(SEG_SPEC).filters(filter).granularity(GRANULARITY).aggregators(AGGS).postAggregators(POST_AGGS).context(CONTEXT);
    QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), new TimeseriesQueryQueryToolChest());
    /*
    For dim0 (2011-01-01/2011-01-05), the combined range is {[1,1], [222,333]}, so segments [-inf,1], [1,2], [2,3], and
    [3,4] is needed
    For dim1 (2011-01-06/2011-01-10), the combined range for the bound filters is {(1,3)}, combined this with the in
    filter result in {[2,2]}, so segments [1,2] and [2,3] is needed
    */
    List<Iterable<Result<TimeseriesResultValue>>> expectedResult = Arrays.asList(makeTimeResults(DateTimes.of("2011-01-01"), 50, 5000, DateTimes.of("2011-01-02"), 10, 1252, DateTimes.of("2011-01-03"), 20, 6213, DateTimes.of("2011-01-04"), 30, 743), makeTimeResults(DateTimes.of("2011-01-07"), 60, 6020, DateTimes.of("2011-01-08"), 70, 250));
    testQueryCachingWithFilter(runner, 3, builder.randomQueryId().build(), expectedResult, Intervals.of("2011-01-01/2011-01-05"), makeTimeResults(DateTimes.of("2011-01-01"), 50, 5000), Intervals.of("2011-01-01/2011-01-05"), makeTimeResults(DateTimes.of("2011-01-02"), 10, 1252), Intervals.of("2011-01-01/2011-01-05"), makeTimeResults(DateTimes.of("2011-01-03"), 20, 6213), Intervals.of("2011-01-01/2011-01-05"), makeTimeResults(DateTimes.of("2011-01-04"), 30, 743), Intervals.of("2011-01-01/2011-01-05"), makeTimeResults(DateTimes.of("2011-01-05"), 40, 6000), Intervals.of("2011-01-06/2011-01-10"), makeTimeResults(DateTimes.of("2011-01-06"), 50, 425), Intervals.of("2011-01-06/2011-01-10"), makeTimeResults(DateTimes.of("2011-01-07"), 60, 6020), Intervals.of("2011-01-06/2011-01-10"), makeTimeResults(DateTimes.of("2011-01-08"), 70, 250), Intervals.of("2011-01-06/2011-01-10"), makeTimeResults(DateTimes.of("2011-01-09"), 23, 85312), Intervals.of("2011-01-06/2011-01-10"), makeTimeResults(DateTimes.of("2011-01-10"), 100, 512));
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) MergeIterable(org.apache.druid.java.util.common.guava.MergeIterable) FunctionalIterable(org.apache.druid.java.util.common.guava.FunctionalIterable) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Druids(org.apache.druid.query.Druids) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Test(org.junit.Test)

Example 57 with InDimFilter

use of org.apache.druid.query.filter.InDimFilter in project druid by druid-io.

the class CachingClusteredClientTest method testHashBasedPruningQueryContextEnabledWithPartitionFunctionAndPartitionDimensionsDoSegmentPruning.

@Test
public void testHashBasedPruningQueryContextEnabledWithPartitionFunctionAndPartitionDimensionsDoSegmentPruning() {
    DimFilter filter = new AndDimFilter(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC), // Equivalent filter of dim3 below is InDimFilter("dim3", Arrays.asList("c"), null)
    new AndDimFilter(new InDimFilter("dim3", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim3", "aaa", "ddd", false, false, false, null, StringComparators.LEXICOGRAPHIC)));
    final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).context(CONTEXT).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS).randomQueryId();
    TimeseriesQuery query = builder.build();
    QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), new TimeseriesQueryQueryToolChest());
    final Interval interval1 = Intervals.of("2011-01-06/2011-01-07");
    final Interval interval2 = Intervals.of("2011-01-07/2011-01-08");
    final Interval interval3 = Intervals.of("2011-01-08/2011-01-09");
    final DruidServer lastServer = servers[random.nextInt(servers.length)];
    List<String> partitionDimensions1 = ImmutableList.of("dim1");
    ServerSelector selector1 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 0, 6);
    ServerSelector selector2 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 1, 6);
    ServerSelector selector3 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 2, 6);
    ServerSelector selector4 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 3, 6);
    ServerSelector selector5 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 4, 6);
    ServerSelector selector6 = makeMockHashBasedSelector(lastServer, partitionDimensions1, HashPartitionFunction.MURMUR3_32_ABS, 5, 6);
    List<String> partitionDimensions2 = ImmutableList.of("dim2");
    ServerSelector selector7 = makeMockHashBasedSelector(lastServer, partitionDimensions2, HashPartitionFunction.MURMUR3_32_ABS, 0, 3);
    ServerSelector selector8 = makeMockHashBasedSelector(lastServer, partitionDimensions2, HashPartitionFunction.MURMUR3_32_ABS, 1, 3);
    ServerSelector selector9 = makeMockHashBasedSelector(lastServer, partitionDimensions2, HashPartitionFunction.MURMUR3_32_ABS, 2, 3);
    List<String> partitionDimensions3 = ImmutableList.of("dim1", "dim3");
    ServerSelector selector10 = makeMockHashBasedSelector(lastServer, partitionDimensions3, HashPartitionFunction.MURMUR3_32_ABS, 0, 4);
    ServerSelector selector11 = makeMockHashBasedSelector(lastServer, partitionDimensions3, HashPartitionFunction.MURMUR3_32_ABS, 1, 4);
    ServerSelector selector12 = makeMockHashBasedSelector(lastServer, partitionDimensions3, HashPartitionFunction.MURMUR3_32_ABS, 2, 4);
    ServerSelector selector13 = makeMockHashBasedSelector(lastServer, partitionDimensions3, HashPartitionFunction.MURMUR3_32_ABS, 3, 4);
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(0, 6, selector1));
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(1, 6, selector2));
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(2, 6, selector3));
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(3, 6, selector4));
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(4, 6, selector5));
    timeline.add(interval1, "v", new NumberedPartitionChunk<>(5, 6, selector6));
    timeline.add(interval2, "v", new NumberedPartitionChunk<>(0, 3, selector7));
    timeline.add(interval2, "v", new NumberedPartitionChunk<>(1, 3, selector8));
    timeline.add(interval2, "v", new NumberedPartitionChunk<>(2, 3, selector9));
    timeline.add(interval3, "v", new NumberedPartitionChunk<>(0, 3, selector10));
    timeline.add(interval3, "v", new NumberedPartitionChunk<>(1, 3, selector11));
    timeline.add(interval3, "v", new NumberedPartitionChunk<>(2, 3, selector12));
    timeline.add(interval3, "v", new NumberedPartitionChunk<>(2, 3, selector13));
    final Capture<QueryPlus> capture = Capture.newInstance();
    final Capture<ResponseContext> contextCap = Capture.newInstance();
    QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
    EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
    EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
    EasyMock.replay(serverView);
    EasyMock.replay(mockRunner);
    List<SegmentDescriptor> expcetedDescriptors = new ArrayList<>();
    // Narrow down to 1 chunk
    expcetedDescriptors.add(new SegmentDescriptor(interval1, "v", 3));
    // Can't filter out any chunks
    expcetedDescriptors.add(new SegmentDescriptor(interval2, "v", 0));
    expcetedDescriptors.add(new SegmentDescriptor(interval2, "v", 1));
    expcetedDescriptors.add(new SegmentDescriptor(interval2, "v", 2));
    // Narrow down to 1 chunk
    expcetedDescriptors.add(new SegmentDescriptor(interval3, "v", 2));
    MultipleSpecificSegmentSpec expected = new MultipleSpecificSegmentSpec(expcetedDescriptors);
    runner.run(QueryPlus.wrap(query)).toList();
    Assert.assertEquals(expected, ((TimeseriesQuery) capture.getValue().getQuery()).getQuerySegmentSpec());
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ArrayList(java.util.ArrayList) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) ServerSelector(org.apache.druid.client.selector.ServerSelector) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Druids(org.apache.druid.query.Druids) ResponseContext(org.apache.druid.query.context.ResponseContext) InDimFilter(org.apache.druid.query.filter.InDimFilter) QueryPlus(org.apache.druid.query.QueryPlus) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 58 with InDimFilter

use of org.apache.druid.query.filter.InDimFilter in project druid by druid-io.

the class ConvertSelectorsToIns method process.

@Override
public DimFilter process(DimFilter filter) {
    if (filter instanceof OrDimFilter) {
        // Copy children list
        final List<DimFilter> children = Lists.newArrayList(((OrDimFilter) filter).getFields());
        // Group filters by dimension and extractionFn.
        final Map<BoundRefKey, List<SelectorDimFilter>> selectors = new HashMap<>();
        for (DimFilter child : children) {
            if (child instanceof SelectorDimFilter) {
                final SelectorDimFilter selector = (SelectorDimFilter) child;
                final BoundRefKey boundRefKey = BoundRefKey.from(selector, RowSignatures.getNaturalStringComparator(sourceRowSignature, SimpleExtraction.of(selector.getDimension(), selector.getExtractionFn())));
                List<SelectorDimFilter> filterList = selectors.computeIfAbsent(boundRefKey, k -> new ArrayList<>());
                filterList.add(selector);
            }
        }
        // Emit IN filters for each group of size > 1.
        for (Map.Entry<BoundRefKey, List<SelectorDimFilter>> entry : selectors.entrySet()) {
            final List<SelectorDimFilter> filterList = entry.getValue();
            if (filterList.size() > 1) {
                // We found a simplification. Remove the old filters and add new ones.
                final Set<String> values = Sets.newHashSetWithExpectedSize(filterList.size());
                for (final SelectorDimFilter selector : filterList) {
                    values.add(selector.getValue());
                    if (!children.remove(selector)) {
                        // Don't expect this to happen, but include it as a sanity check.
                        throw new ISE("Tried to remove selector but couldn't");
                    }
                }
                children.add(new InDimFilter(entry.getKey().getDimension(), values, entry.getKey().getExtractionFn(), null));
            }
        }
        if (!children.equals(((OrDimFilter) filter).getFields())) {
            return children.size() == 1 ? children.get(0) : new OrDimFilter(children);
        } else {
            return filter;
        }
    } else {
        return filter;
    }
}
Also used : HashMap(java.util.HashMap) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) DimFilter(org.apache.druid.query.filter.DimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

InDimFilter (org.apache.druid.query.filter.InDimFilter)58 Test (org.junit.Test)43 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)39 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)24 Filter (org.apache.druid.query.filter.Filter)18 FalseFilter (org.apache.druid.segment.filter.FalseFilter)17 OrFilter (org.apache.druid.segment.filter.OrFilter)17 SelectorFilter (org.apache.druid.segment.filter.SelectorFilter)17 ArrayList (java.util.ArrayList)16 ExpressionDimFilter (org.apache.druid.query.filter.ExpressionDimFilter)16 AndFilter (org.apache.druid.segment.filter.AndFilter)16 BoundFilter (org.apache.druid.segment.filter.BoundFilter)16 JoinFilterPreAnalysis (org.apache.druid.segment.join.filter.JoinFilterPreAnalysis)16 JoinFilterSplit (org.apache.druid.segment.join.filter.JoinFilterSplit)16 DimFilter (org.apache.druid.query.filter.DimFilter)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)13 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)12 RegexDimFilter (org.apache.druid.query.filter.RegexDimFilter)12 SearchQueryDimFilter (org.apache.druid.query.filter.SearchQueryDimFilter)12 ContainsSearchQuerySpec (org.apache.druid.query.search.ContainsSearchQuerySpec)12