use of io.druid.query.lookup.LookupExtractionFn in project druid by druid-io.
the class InDimFilter method optimizeLookup.
private InDimFilter optimizeLookup() {
if (extractionFn instanceof LookupExtractionFn && ((LookupExtractionFn) extractionFn).isOptimize()) {
LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
LookupExtractor lookup = exFn.getLookup();
final List<String> keys = new ArrayList<>();
for (String value : values) {
// We cannot do an unapply()-based optimization if the selector value
// and the replaceMissingValuesWith value are the same, since we have to match on
// all values that are not present in the lookup.
final String convertedValue = Strings.emptyToNull(value);
if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
return this;
}
keys.addAll(lookup.unapply(convertedValue));
// If the selector value is overwritten in the lookup map, don't add selector value to keys.
if (exFn.isRetainMissingValue() && lookup.apply(convertedValue) == null) {
keys.add(convertedValue);
}
}
if (keys.isEmpty()) {
return this;
} else {
return new InDimFilter(dimension, keys, null);
}
}
return this;
}
use of io.druid.query.lookup.LookupExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithSimpleRenameAndMissingString.
@Test
public void testGroupByWithSimpleRenameAndMissingString() {
Map<String, String> map = new HashMap<>();
map.put("automotive", "automotive0");
map.put("business", "business0");
map.put("entertainment", "entertainment0");
map.put("health", "health0");
map.put("mezzanine", "mezzanine0");
map.put("news", "news0");
map.put("premium", "premium0");
map.put("technology", "technology0");
map.put("travel", "travel0");
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map, false), false, "MISSING", true, false)))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive0", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business0", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment0", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health0", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine0", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news0", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium0", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology0", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel0", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive0", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business0", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment0", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health0", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine0", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news0", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium0", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology0", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel0", "rows", 1L, "idx", 126L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.lookup.LookupExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithAllFiltersOnNullDimsWithExtractionFns.
@Test
public void testGroupByWithAllFiltersOnNullDimsWithExtractionFns() {
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("", "EMPTY");
extractionMap.put(null, "EMPTY");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
LookupExtractionFn extractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true);
String jsFn = "function(x) { return(x === 'EMPTY') }";
List<DimFilter> superFilterList = new ArrayList<>();
superFilterList.add(new SelectorDimFilter("null_column", "EMPTY", extractionFn));
superFilterList.add(new InDimFilter("null_column", Arrays.asList("NOT-EMPTY", "FOOBAR", "EMPTY"), extractionFn));
superFilterList.add(new BoundDimFilter("null_column", "EMPTY", "EMPTY", false, false, true, extractionFn, StringComparators.ALPHANUMERIC));
superFilterList.add(new RegexDimFilter("null_column", "EMPTY", extractionFn));
superFilterList.add(new SearchQueryDimFilter("null_column", new ContainsSearchQuerySpec("EMPTY", true), extractionFn));
superFilterList.add(new JavaScriptDimFilter("null_column", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()));
DimFilter superFilter = new AndDimFilter(superFilterList);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("null_column", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).setDimFilter(superFilter).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", null, "rows", 13L, "idx", 6619L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", null, "rows", 13L, "idx", 5827L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.lookup.LookupExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap.
@Test
public void testGroupByWithExtractionDimFilterWhenSearchValueNotInTheMap() {
Map<String, String> extractionMap = new HashMap<>();
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false);
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).setDimFilter(new ExtractionDimFilter("quality", "NOT_THERE", lookupExtractionFn, null)).build();
List<Row> expectedResults = Arrays.asList();
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.lookup.LookupExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithExtractionDimFilter.
// Extraction Filters testing
@Test
public void testGroupByWithExtractionDimFilter() {
Map<String, String> extractionMap = new HashMap<>();
extractionMap.put("automotive", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("business", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("mezzanine", "automotiveAndBusinessAndNewsAndMezzanine");
extractionMap.put("news", "automotiveAndBusinessAndNewsAndMezzanine");
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false);
List<DimFilter> dimFilters = Lists.<DimFilter>newArrayList(new ExtractionDimFilter("quality", "automotiveAndBusinessAndNewsAndMezzanine", lookupExtractionFn, null), new SelectorDimFilter("quality", "entertainment", null), new SelectorDimFilter("quality", "health", null), new SelectorDimFilter("quality", "premium", null), new SelectorDimFilter("quality", "technology", null), new SelectorDimFilter("quality", "travel", null));
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).setDimFilter(Druids.newOrDimFilterBuilder().fields(dimFilters).build()).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Aggregations