use of io.druid.query.filter.BoundDimFilter in project druid by druid-io.
the class TimeseriesBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TimeseriesQuery> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
TimeseriesQuery queryA = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
basicQueries.put("A", queryA);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
BoundDimFilter timeFilter = new BoundDimFilter(Column.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.NUMERIC);
queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
basicQueries.put("timeFilterNumeric", timeFilterQuery);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
BoundDimFilter timeFilter = new BoundDimFilter(Column.TIME_COLUMN_NAME, "200000", "300000", false, false, null, null, StringComparators.ALPHANUMERIC);
queryAggs.add(new FilteredAggregatorFactory(lsaf, timeFilter));
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
basicQueries.put("timeFilterAlphanumeric", timeFilterQuery);
}
{
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(new Interval(200000, 300000)));
List<AggregatorFactory> queryAggs = new ArrayList<>();
LongSumAggregatorFactory lsaf = new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential");
queryAggs.add(lsaf);
TimeseriesQuery timeFilterQuery = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
basicQueries.put("timeFilterByInterval", timeFilterQuery);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of io.druid.query.filter.BoundDimFilter in project druid by druid-io.
the class SearchBenchmark method basicC.
private static SearchQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
final List<String> dimUniformFilterVals = Lists.newArrayList();
final int resultNum = (int) (100000 * 0.1);
final int step = 100000 / resultNum;
for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) {
dimUniformFilterVals.add(String.valueOf(i));
}
final String dimName = "dimUniform";
final List<DimFilter> dimFilters = Lists.newArrayList();
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance()));
dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance()));
dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[] { 0xF };
}
@Override
public String apply(String value) {
return String.valueOf(Long.parseLong(value) + 1);
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.ONE_TO_ONE;
}
}, null));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3)));
return Druids.newSearchQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).query("").dimensions(Lists.newArrayList("dimUniform")).filters(new AndDimFilter(dimFilters));
}
use of io.druid.query.filter.BoundDimFilter in project druid by druid-io.
the class TimeseriesQueryRunnerTest method testTimeseriesWithBoundFilter1.
@Test
public void testTimeseriesWithBoundFilter1() {
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.dayGran).filters(new AndDimFilter(Arrays.asList(new BoundDimFilter(QueryRunnerTestHelper.marketDimension, "spa", "spot", true, null, null, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter(QueryRunnerTestHelper.marketDimension, "spot", "spotify", null, true, null, null, StringComparators.LEXICOGRAPHIC), (DimFilter) new BoundDimFilter(QueryRunnerTestHelper.marketDimension, "SPOT", "spot", null, null, null, null, StringComparators.LEXICOGRAPHIC)))).intervals(QueryRunnerTestHelper.firstToThird).aggregators(Arrays.<AggregatorFactory>asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexLongSum, QueryRunnerTestHelper.qualityUniques)).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-04-01"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 9L, "index", 1102L, "addRowsIndexConstant", 1112.0, "uniques", QueryRunnerTestHelper.UNIQUES_9))), new Result<>(new DateTime("2011-04-02"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("rows", 9L, "index", 1120L, "addRowsIndexConstant", 1130.0, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
Iterable<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, CONTEXT), Lists.<Result<TimeseriesResultValue>>newArrayList());
TestHelper.assertExpectedResults(expectedResults, results);
}
use of io.druid.query.filter.BoundDimFilter in project druid by druid-io.
the class DruidSemiJoin method getLeftRelWithFilter.
/**
* Returns a copy of the left rel with the filter applied from the right-hand side. This is an expensive operation
* since it actually executes the right-hand side query.
*/
private DruidRel<?> getLeftRelWithFilter() {
// Build list of acceptable values from right side.
final Set<List<String>> valuess = Sets.newHashSet();
final List<DimFilter> filters = Lists.newArrayList();
right.runQuery().accumulate(null, new Accumulator<Object, Object[]>() {
@Override
public Object accumulate(final Object dummyValue, final Object[] row) {
final List<String> values = Lists.newArrayListWithCapacity(rightKeys.size());
for (int i : rightKeys) {
final Object value = row[i];
final String stringValue = value != null ? String.valueOf(value) : "";
values.add(stringValue);
if (values.size() > maxSemiJoinRowsInMemory) {
throw new ResourceLimitExceededException(String.format("maxSemiJoinRowsInMemory[%,d] exceeded", maxSemiJoinRowsInMemory));
}
}
if (valuess.add(values)) {
final List<DimFilter> bounds = Lists.newArrayList();
for (int i = 0; i < values.size(); i++) {
bounds.add(new BoundDimFilter(leftRowExtractions.get(i).getColumn(), values.get(i), values.get(i), false, false, null, leftRowExtractions.get(i).getExtractionFn(), getSourceRowSignature().naturalStringComparator(leftRowExtractions.get(i))));
}
filters.add(new AndDimFilter(bounds));
}
return null;
}
});
valuess.clear();
if (!filters.isEmpty()) {
// Add a filter to the left side. Use OR of singleton Bound filters so they can be simplified later.
final DimFilter semiJoinFilter = new OrDimFilter(filters);
final DimFilter newFilter = left.getQueryBuilder().getFilter() == null ? semiJoinFilter : new AndDimFilter(ImmutableList.of(semiJoinFilter, left.getQueryBuilder().getFilter()));
return left.withQueryBuilder(left.getQueryBuilder().withFilter(newFilter));
} else {
return null;
}
}
use of io.druid.query.filter.BoundDimFilter in project druid by druid-io.
the class ConvertBoundsToSelectors method process.
@Override
public DimFilter process(DimFilter filter) {
if (filter instanceof BoundDimFilter) {
final BoundDimFilter bound = (BoundDimFilter) filter;
final StringComparator naturalStringComparator = sourceRowSignature.naturalStringComparator(RowExtraction.of(bound.getDimension(), bound.getExtractionFn()));
if (bound.hasUpperBound() && bound.hasLowerBound() && bound.getUpper().equals(bound.getLower()) && !bound.isUpperStrict() && !bound.isLowerStrict() && bound.getOrdering().equals(naturalStringComparator)) {
return new SelectorDimFilter(bound.getDimension(), bound.getUpper(), bound.getExtractionFn());
} else {
return filter;
}
} else {
return filter;
}
}
Aggregations