use of io.druid.query.extraction.DimExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByTimeExtractionWithNulls.
@Test
public void testGroupByTimeExtractionWithNulls() {
final DimExtractionFn nullWednesdays = new DimExtractionFn() {
@Override
public String apply(String dimValue) {
if ("Wednesday".equals(dimValue)) {
return null;
} else {
return dimValue;
}
}
@Override
public byte[] getCacheKey() {
throw new UnsupportedOperationException();
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}
};
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval).setDimensions(Lists.newArrayList(new DefaultDimensionSpec("market", "market"), new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "dayOfWeek", new CascadeExtractionFn(new ExtractionFn[] { new TimeFormatExtractionFn("EEEE", null, null, null, false), nullWednesdays })))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexDoubleSum)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).setGranularity(QueryRunnerTestHelper.allGran).setDimFilter(new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("market", "spot", null), new SelectorDimFilter("market", "upfront", null)))).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438));
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
use of io.druid.query.extraction.DimExtractionFn in project druid by druid-io.
the class TopNQueryRunnerTest method testTopNLexicographicDimExtractionWithSortingPreservedAndPreviousStop.
@Test
public void testTopNLexicographicDimExtractionWithSortingPreservedAndPreviousStop() {
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[0];
}
@Override
public String apply(String value) {
return value.substring(0, 1);
}
@Override
public boolean preservesOrdering() {
return true;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}
})).metric(new DimensionTopNMetricSpec("s", StringComparators.LEXICOGRAPHIC)).threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "t", "rows", 4L, "index", 5351.814697265625D, "addRowsIndexConstant", 5356.814697265625D, "uniques", QueryRunnerTestHelper.UNIQUES_2), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "u", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
assertExpectedResults(expectedResults, query);
}
use of io.druid.query.extraction.DimExtractionFn in project druid by druid-io.
the class TopNQueryRunnerTest method testTopNWithEmptyStringProducingDimExtractionFn.
@Test
public /**
* This test exists only to show what the current behavior is and not necessarily to define that this is
* correct behavior. In fact, the behavior when returning the empty string from a DimExtractionFn is, by
* contract, undefined, so this can do anything.
*/
void testTopNWithEmptyStringProducingDimExtractionFn() {
final ExtractionFn emptyStringDimExtraction = new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[] { (byte) 0xFF };
}
@Override
public String apply(String dimValue) {
return dimValue.equals("total_market") ? "" : dimValue;
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}
};
final TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).metric("rows").threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction)).build();
List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), new LinkedHashMap<String, Object>() {
{
put(QueryRunnerTestHelper.marketDimension, "");
put("rows", 4L);
put("index", 5351.814697265625D);
put("addRowsIndexConstant", 5356.814697265625D);
put("uniques", QueryRunnerTestHelper.UNIQUES_2);
}
}, ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
assertExpectedResults(expectedResults, query);
}
use of io.druid.query.extraction.DimExtractionFn in project druid by druid-io.
the class SearchBenchmark method basicC.
private static SearchQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
final List<String> dimUniformFilterVals = Lists.newArrayList();
final int resultNum = (int) (100000 * 0.1);
final int step = 100000 / resultNum;
for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) {
dimUniformFilterVals.add(String.valueOf(i));
}
final String dimName = "dimUniform";
final List<DimFilter> dimFilters = Lists.newArrayList();
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance()));
dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance()));
dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[] { 0xF };
}
@Override
public String apply(String value) {
return String.valueOf(Long.parseLong(value) + 1);
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.ONE_TO_ONE;
}
}, null));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3)));
return Druids.newSearchQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).query("").dimensions(Lists.newArrayList("dimUniform")).filters(new AndDimFilter(dimFilters));
}
use of io.druid.query.extraction.DimExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithEmptyStringProducingDimExtractionFn.
@Test
@Ignore
public /**
* This test exists only to show what the current behavior is and not necessarily to define that this is
* correct behavior. In fact, the behavior when returning the empty string from a DimExtractionFn is, by
* contract, undefined, so this can do anything.
*/
void testGroupByWithEmptyStringProducingDimExtractionFn() {
final ExtractionFn emptyStringExtractionFn = new RegexDimExtractionFn("(\\w{1})", false, null) {
@Override
public byte[] getCacheKey() {
return new byte[] { (byte) 0xFF };
}
@Override
public String apply(String dimValue) {
return dimValue.equals("mezzanine") ? "" : super.apply(dimValue);
}
};
GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).setDimensions(Lists.<DimensionSpec>newArrayList(new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn))).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "t", "rows", 2L, "idx", 223L));
TestHelper.assertExpectedObjects(expectedResults, GroupByQueryRunnerTestHelper.runQuery(factory, runner, query), "");
}
Aggregations