Search in sources :

Example 21 with LookupExtractionFn

use of org.apache.druid.query.lookup.LookupExtractionFn in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithSimpleRenameRetainMissing.

@Test
public void testGroupByWithSimpleRenameRetainMissing() {
    Map<String, String> map = new HashMap<>();
    map.put("automotive", "automotive0");
    map.put("business", "business0");
    map.put("entertainment", "entertainment0");
    map.put("health", "health0");
    map.put("mezzanine", "mezzanine0");
    map.put("news", "news0");
    map.put("premium", "premium0");
    map.put("technology", "technology0");
    map.put("travel", "travel0");
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new ExtractionDimensionSpec("quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map, false), true, null, true, false))).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "automotive0", "rows", 1L, "idx", 135L), makeRow(query, "2011-04-01", "alias", "business0", "rows", 1L, "idx", 118L), makeRow(query, "2011-04-01", "alias", "entertainment0", "rows", 1L, "idx", 158L), makeRow(query, "2011-04-01", "alias", "health0", "rows", 1L, "idx", 120L), makeRow(query, "2011-04-01", "alias", "mezzanine0", "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "news0", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-01", "alias", "premium0", "rows", 3L, "idx", 2900L), makeRow(query, "2011-04-01", "alias", "technology0", "rows", 1L, "idx", 78L), makeRow(query, "2011-04-01", "alias", "travel0", "rows", 1L, "idx", 119L), makeRow(query, "2011-04-02", "alias", "automotive0", "rows", 1L, "idx", 147L), makeRow(query, "2011-04-02", "alias", "business0", "rows", 1L, "idx", 112L), makeRow(query, "2011-04-02", "alias", "entertainment0", "rows", 1L, "idx", 166L), makeRow(query, "2011-04-02", "alias", "health0", "rows", 1L, "idx", 113L), makeRow(query, "2011-04-02", "alias", "mezzanine0", "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "news0", "rows", 1L, "idx", 114L), makeRow(query, "2011-04-02", "alias", "premium0", "rows", 3L, "idx", 2505L), makeRow(query, "2011-04-02", "alias", "technology0", "rows", 1L, "idx", 97L), makeRow(query, "2011-04-02", "alias", "travel0", "rows", 1L, "idx", 126L));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "retain-missing");
}
Also used : LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 22 with LookupExtractionFn

use of org.apache.druid.query.lookup.LookupExtractionFn in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithAlphaNumericDimensionOrder.

@Test
public void testGroupByWithAlphaNumericDimensionOrder() {
    // Cannot vectorize due to extraction dimension spec.
    cannotVectorize();
    Map<String, String> map = new HashMap<>();
    map.put("automotive", "health105");
    map.put("business", "health20");
    map.put("entertainment", "travel47");
    map.put("health", "health55");
    map.put("mezzanine", "health09");
    map.put("news", "health0000");
    map.put("premium", "health999");
    map.put("technology", "travel123");
    map.put("travel", "travel555");
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new ExtractionDimensionSpec("quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map, false), false, null, false, false))).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("alias", null, StringComparators.ALPHANUMERIC)), null)).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "health0000", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-01", "alias", "health09", "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "health20", "rows", 1L, "idx", 118L), makeRow(query, "2011-04-01", "alias", "health55", "rows", 1L, "idx", 120L), makeRow(query, "2011-04-01", "alias", "health105", "rows", 1L, "idx", 135L), makeRow(query, "2011-04-01", "alias", "health999", "rows", 3L, "idx", 2900L), makeRow(query, "2011-04-01", "alias", "travel47", "rows", 1L, "idx", 158L), makeRow(query, "2011-04-01", "alias", "travel123", "rows", 1L, "idx", 78L), makeRow(query, "2011-04-01", "alias", "travel555", "rows", 1L, "idx", 119L), makeRow(query, "2011-04-02", "alias", "health0000", "rows", 1L, "idx", 114L), makeRow(query, "2011-04-02", "alias", "health09", "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "health20", "rows", 1L, "idx", 112L), makeRow(query, "2011-04-02", "alias", "health55", "rows", 1L, "idx", 113L), makeRow(query, "2011-04-02", "alias", "health105", "rows", 1L, "idx", 147L), makeRow(query, "2011-04-02", "alias", "health999", "rows", 3L, "idx", 2505L), makeRow(query, "2011-04-02", "alias", "travel47", "rows", 1L, "idx", 166L), makeRow(query, "2011-04-02", "alias", "travel123", "rows", 1L, "idx", 97L), makeRow(query, "2011-04-02", "alias", "travel555", "rows", 1L, "idx", 126L));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "alphanumeric-dimension-order");
}
Also used : DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 23 with LookupExtractionFn

use of org.apache.druid.query.lookup.LookupExtractionFn in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithExtractionDimFilter.

// Extraction Filters testing
@Test
public void testGroupByWithExtractionDimFilter() {
    Map<String, String> extractionMap = new HashMap<>();
    extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
    extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
    extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
    extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
    MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
    LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false);
    List<DimFilter> dimFilters = Lists.newArrayList(new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null), new SelectorDimFilter("quality", "entertainment", null), new SelectorDimFilter("quality", "health", null), new SelectorDimFilter("quality", "premium", null), new SelectorDimFilter("quality", "technology", null), new SelectorDimFilter("quality", "travel", null));
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setDimFilter(new OrDimFilter(dimFilters)).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), makeRow(query, "2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), makeRow(query, "2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), makeRow(query, "2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), makeRow(query, "2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), makeRow(query, "2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), makeRow(query, "2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), makeRow(query, "2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), makeRow(query, "2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), makeRow(query, "2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), makeRow(query, "2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), makeRow(query, "2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), makeRow(query, "2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), makeRow(query, "2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "dim-extraction");
}
Also used : HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) NotDimFilter(org.apache.druid.query.filter.NotDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 24 with LookupExtractionFn

use of org.apache.druid.query.lookup.LookupExtractionFn in project druid by druid-io.

the class BloomDimFilterTest method testSelectorWithLookupExtractionFn.

@Test
public void testSelectorWithLookupExtractionFn() throws IOException {
    final Map<String, String> stringMap = ImmutableMap.of("1", "HELLO", "a", "HELLO", "def", "HELLO", "abc", "UNKNOWN");
    LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
    LookupExtractionFn lookupFn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
    assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("1"));
    assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "2", "3", "4", "5"));
    assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("3", "4"));
    assertFilterMatches(new BloomDimFilter("dim1", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "5"));
    assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of("0", "3"));
    assertFilterMatches(new BloomDimFilter("dim2", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "4", "5"));
    assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of());
    assertFilterMatches(new BloomDimFilter("dim3", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5"));
    assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "HELLO"), lookupFn), ImmutableList.of());
    assertFilterMatches(new BloomDimFilter("dim4", bloomKFilter(1000, "UNKNOWN"), lookupFn), ImmutableList.of("0", "1", "2", "3", "4", "5"));
    final Map<String, String> stringMap2 = ImmutableMap.of("2", "5");
    LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false);
    LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true);
    assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, "5"), lookupFn2), ImmutableList.of("2", "5"));
    final Map<String, String> stringMap3 = ImmutableMap.of("1", "");
    LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false);
    LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true);
    if (NullHandling.replaceWithDefault()) {
        // Nulls and empty strings are considered equivalent
        assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), ImmutableList.of("0", "1", "2", "3", "4", "5"));
    } else {
        assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, (String) null), lookupFn3), ImmutableList.of("0", "2", "3", "4", "5"));
        assertFilterMatches(new BloomDimFilter("dim0", bloomKFilter(1000, ""), lookupFn3), ImmutableList.of("1"));
    }
}
Also used : LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) LookupExtractor(org.apache.druid.query.lookup.LookupExtractor) Test(org.junit.Test) BaseFilterTest(org.apache.druid.segment.filter.BaseFilterTest)

Example 25 with LookupExtractionFn

use of org.apache.druid.query.lookup.LookupExtractionFn in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithExtractionDimFilterOptimazitionManyToOne.

@Test
public void testGroupByWithExtractionDimFilterOptimazitionManyToOne() {
    Map<String, String> extractionMap = new HashMap<>();
    extractionMap.put("mezzanine", "newsANDmezzanine");
    extractionMap.put("news", "newsANDmezzanine");
    MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
    LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true);
    GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setDimFilter(new ExtractionDimFilter("quality", "newsANDmezzanine", lookupExtractionFn, null)).build();
    List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "extraction-dim-filter");
}
Also used : LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

LookupExtractionFn (org.apache.druid.query.lookup.LookupExtractionFn)41 MapLookupExtractor (org.apache.druid.query.extraction.MapLookupExtractor)40 Test (org.junit.Test)39 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)30 HashMap (java.util.HashMap)23 ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)20 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)18 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)14 Result (org.apache.druid.query.Result)13 ExtractionDimFilter (org.apache.druid.query.filter.ExtractionDimFilter)13 LookupExtractor (org.apache.druid.query.lookup.LookupExtractor)9 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)8 InDimFilter (org.apache.druid.query.filter.InDimFilter)7 JavaScriptDimFilter (org.apache.druid.query.filter.JavaScriptDimFilter)6 RegexDimFilter (org.apache.druid.query.filter.RegexDimFilter)6 SearchQueryDimFilter (org.apache.druid.query.filter.SearchQueryDimFilter)6 ArrayList (java.util.ArrayList)5 AndDimFilter (org.apache.druid.query.filter.AndDimFilter)5 DimFilter (org.apache.druid.query.filter.DimFilter)5