use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByTimeExtraction.
@Test
public void testGroupByTimeExtraction() {
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval).setDimensions(Lists.newArrayList(new DefaultDimensionSpec("market", "market"), new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "dayOfWeek", new TimeFormatExtractionFn("EEEE", null, null, null, false)))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexDoubleSum)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).setGranularity(QueryRunnerTestHelper.allGran).setDimFilter(new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("market", "spot", null), new SelectorDimFilter("market", "upfront", null)))).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Wednesday", "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithSimpleRenameAndMissingString.
@Test
public void testGroupByWithSimpleRenameAndMissingString() {
Map<String, String> map = new HashMap<>();
map.put("automotive", "automotive0");
map.put("business", "business0");
map.put("entertainment", "entertainment0");
map.put("health", "health0");
map.put("mezzanine", "mezzanine0");
map.put("news", "news0");
map.put("premium", "premium0");
map.put("technology", "technology0");
map.put("travel", "travel0");
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("quality", "alias", new LookupExtractionFn(new MapLookupExtractor(map, false), false, "MISSING", true, false)))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive0", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business0", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment0", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health0", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine0", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news0", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium0", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology0", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel0", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive0", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business0", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment0", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health0", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine0", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news0", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium0", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology0", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel0", "rows", 1L, "idx", 126L));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByCardinalityAggWithExtractionFn.
@Test
public void testGroupByCardinalityAggWithExtractionFn() {
String helloJsFn = "function(str) { return 'hello' }";
ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance());
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("market", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new CardinalityAggregatorFactory("numVals", ImmutableList.<DimensionSpec>of(new ExtractionDimensionSpec(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension, helloFn)), false))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "spot", "rows", 9L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "total_market", "rows", 2L, "numVals", 1.0002442201269182d), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "upfront", "rows", 2L, "numVals", 1.0002442201269182d));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByTimeExtractionWithNulls.
@Test
public void testGroupByTimeExtractionWithNulls() {
final DimExtractionFn nullWednesdays = new DimExtractionFn() {
@Override
public String apply(String dimValue) {
if ("Wednesday".equals(dimValue)) {
return null;
} else {
return dimValue;
}
}
@Override
public byte[] getCacheKey() {
throw new UnsupportedOperationException();
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}
};
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval).setDimensions(Lists.newArrayList(new DefaultDimensionSpec("market", "market"), new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "dayOfWeek", new CascadeExtractionFn(new ExtractionFn[] { new TimeFormatExtractionFn("EEEE", null, null, null, false), nullWednesdays })))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexDoubleSum)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).setGranularity(QueryRunnerTestHelper.allGran).setDimFilter(new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("market", "spot", null), new SelectorDimFilter("market", "upfront", null)))).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithExtractionFnInOuterQuery.
@Test
public void testSubqueryWithExtractionFnInOuterQuery() {
//https://github.com/druid-io/druid/issues/2556
GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null, JavaScriptConfig.getEnabledInstance())).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2011-04-01T00:00:00.000Z/2011-04-03T00:00:00.000Z")))).setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("alias", "alias", new RegexDimExtractionFn("(a).*", true, "a")))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "a", "rows", 13L, "idx", 6619L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "a", "rows", 13L, "idx", 5827L));
// Subqueries are handled by the ToolChest
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Aggregations