use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.
the class SearchQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<SearchResultValue>, Object, SearchQuery> getCacheStrategy(final SearchQuery query) {
return new CacheStrategy<Result<SearchResultValue>, Object, SearchQuery>() {
private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.emptyList();
private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, DimensionSpec::getOutputName) : Collections.emptyList();
@Override
public boolean isCacheable(SearchQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(SearchQuery query) {
final DimFilter dimFilter = query.getDimensionsFilter();
final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
final byte[] querySpecBytes = query.getQuery().getCacheKey();
final byte[] granularityBytes = query.getGranularity().getCacheKey();
final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.emptyList();
final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
int dimensionsBytesSize = 0;
int index = 0;
for (DimensionSpec dimensionSpec : dimensionSpecs) {
dimensionsBytes[index] = dimensionSpec.getCacheKey();
dimensionsBytesSize += dimensionsBytes[index].length;
++index;
}
final byte[] sortSpecBytes = query.getSort().getCacheKey();
final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + 4 + granularityBytes.length + filterBytes.length + querySpecBytes.length + dimensionsBytesSize + sortSpecBytes.length).put(SEARCH_QUERY).put(Ints.toByteArray(query.getLimit())).put(granularityBytes).put(filterBytes).put(querySpecBytes).put(sortSpecBytes);
for (byte[] bytes : dimensionsBytes) {
queryCacheKey.put(bytes);
}
return queryCacheKey.array();
}
@Override
public byte[] computeResultLevelCacheKey(SearchQuery query) {
return computeCacheKey(query);
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<SearchResultValue>, Object> prepareForCache(boolean isResultLevelCache) {
return new Function<Result<SearchResultValue>, Object>() {
@Override
public Object apply(Result<SearchResultValue> input) {
return dimensionSpecs.size() > 0 ? Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue(), dimOutputNames) : Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue());
}
};
}
@Override
public Function<Object, Result<SearchResultValue>> pullFromCache(boolean isResultLevelCache) {
return new Function<Object, Result<SearchResultValue>>() {
@Override
@SuppressWarnings("unchecked")
public Result<SearchResultValue> apply(Object input) {
List<Object> result = (List<Object>) input;
boolean needsRename = false;
final Map<String, String> outputNameMap = new HashMap<>();
if (hasOutputName(result)) {
List<String> cachedOutputNames = (List) result.get(2);
Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "cache hit, but number of dimensions mismatch");
needsRename = false;
for (int idx = 0; idx < cachedOutputNames.size(); idx++) {
String cachedOutputName = cachedOutputNames.get(idx);
String outputName = dimOutputNames.get(idx);
if (!cachedOutputName.equals(outputName)) {
needsRename = true;
}
outputNameMap.put(cachedOutputName, outputName);
}
}
return !needsRename ? new Result<>(DateTimes.utc(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {
@Override
public SearchHit apply(@Nullable Object input) {
if (input instanceof Map) {
return new SearchHit((String) ((Map) input).get("dimension"), (String) ((Map) input).get("value"), (Integer) ((Map) input).get("count"));
} else if (input instanceof SearchHit) {
return (SearchHit) input;
} else {
throw new IAE("Unknown format [%s]", input.getClass());
}
}
}))) : new Result<>(DateTimes.utc(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {
@Override
public SearchHit apply(@Nullable Object input) {
String dim;
String val;
Integer count;
if (input instanceof Map) {
dim = outputNameMap.get((String) ((Map) input).get("dimension"));
val = (String) ((Map) input).get("value");
count = (Integer) ((Map) input).get("count");
} else if (input instanceof SearchHit) {
SearchHit cached = (SearchHit) input;
dim = outputNameMap.get(cached.getDimension());
val = cached.getValue();
count = cached.getCount();
} else {
throw new IAE("Unknown format [%s]", input.getClass());
}
return new SearchHit(dim, val, count);
}
})));
}
};
}
private boolean hasOutputName(List<Object> cachedEntry) {
/*
* cached entry is list of two or three objects
* 1. timestamp
* 2. SearchResultValue
* 3. outputName of each dimension (optional)
*
* if a cached entry has three objects, dimension name of SearchResultValue should be check if rename is needed
*/
return cachedEntry.size() == 3;
}
};
}
use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.
the class FilterPartitionTest method testDistributeOrCNFExtractionFn.
@Test
public void testDistributeOrCNFExtractionFn() {
DimFilter dimFilter1 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dim0", "super-6", JS_EXTRACTION_FN), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dim1", "super-abdef", JS_EXTRACTION_FN), new SelectorDimFilter("dim2", "super-c", JS_EXTRACTION_FN)))));
Filter filter1 = dimFilter1.toFilter();
Filter filter1CNF = Filters.toCnf(filter1);
Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
assertFilterMatches(dimFilter1, ImmutableList.of("4", "6"));
DimFilter dimFilter2 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dim0", "super-2", JS_EXTRACTION_FN), new SelectorDimFilter("dim0", "super-3", JS_EXTRACTION_FN), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dim1", "super-HELLO", JS_EXTRACTION_FN), new SelectorDimFilter("dim2", "super-foo", JS_EXTRACTION_FN)))));
assertFilterMatches(dimFilter2, ImmutableList.of("2", "3", "7"));
DimFilter dimFilter3 = new OrDimFilter(dimFilter1, dimFilter2, new AndDimFilter(new NoBitmapSelectorDimFilter("dim1", "super-1", JS_EXTRACTION_FN), new SelectorDimFilter("dim2", "super-foo", JS_EXTRACTION_FN)));
assertFilterMatches(dimFilter3, ImmutableList.of("2", "3", "4", "6", "7", "9"));
}
use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.
the class CalciteQueryTest method testAnyAggregatorsSkipNullsWithFilter.
@Test
public void testAnyAggregatorsSkipNullsWithFilter() throws Exception {
final DimFilter filter;
if (useDefault) {
filter = not(selector("dim1", null, null));
} else {
filter = and(not(selector("dim1", null, null)), not(selector("l2", null, null)), not(selector("d2", null, null)), not(selector("f2", null, null)));
}
testQuery("SELECT ANY_VALUE(dim1, 32), ANY_VALUE(l2), ANY_VALUE(d2), ANY_VALUE(f2) " + "FROM druid.numfoo " + "WHERE dim1 IS NOT NULL AND l2 IS NOT NULL AND d2 IS NOT NULL AND f2 is NOT NULL", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(filter).aggregators(aggregators(new StringAnyAggregatorFactory("a0", "dim1", 32), new LongAnyAggregatorFactory("a1", "l2"), new DoubleAnyAggregatorFactory("a2", "d2"), new FloatAnyAggregatorFactory("a3", "f2"))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(// first row of dim1 is empty string, which is null in default mode
new Object[] { "10.1", 325323L, 1.7, 0.1f }));
}
use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.
the class CalciteQueryTest method testFirstLatestAggregatorsSkipNulls.
@Test
public void testFirstLatestAggregatorsSkipNulls() throws Exception {
// Cannot vectorize LATEST aggregator.
skipVectorize();
final DimFilter filter;
if (useDefault) {
filter = not(selector("dim1", null, null));
} else {
filter = and(not(selector("dim1", null, null)), not(selector("l1", null, null)), not(selector("d1", null, null)), not(selector("f1", null, null)));
}
testQuery("SELECT EARLIEST(dim1, 32), LATEST(l1), LATEST(d1), LATEST(f1) " + "FROM druid.numfoo " + "WHERE dim1 IS NOT NULL AND l1 IS NOT NULL AND d1 IS NOT NULL AND f1 is NOT NULL", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).filters(filter).aggregators(aggregators(new StringFirstAggregatorFactory("a0", "dim1", null, 32), new LongLastAggregatorFactory("a1", "l1", null), new DoubleLastAggregatorFactory("a2", "d1", null), new FloatLastAggregatorFactory("a3", "f1", null))).context(QUERY_CONTEXT_DEFAULT).build()), ImmutableList.of(// first row of dim1 is empty string, which is null in default mode, last non-null numeric rows are zeros
new Object[] { useDefault ? "10.1" : "", 0L, 0.0, 0.0f }));
}
use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.
the class TopNQueryRunnerTest method testTopNWithExtractionFilterNoExistingValue.
@Test
public void testTopNWithExtractionFilterNoExistingValue() {
Map<String, String> extractionMap = new HashMap<>();
MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
LookupExtractionFn lookupExtractionFn;
if (NullHandling.replaceWithDefault()) {
lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, true);
extractionMap.put("", "NULL");
} else {
extractionMap.put("", "NOT_USED");
lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "NULL", true, true);
}
DimFilter extractionFilter = new ExtractionDimFilter("null_column", "NULL", lookupExtractionFn, null);
TopNQueryBuilder topNQueryBuilder = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("null_column").metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new FilteredAggregatorFactory(new DoubleMaxAggregatorFactory("maxIndex", "index"), extractionFilter), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT);
TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder.filters(extractionFilter).build();
Map<String, Object> map = new HashMap<>();
map.put("null_column", null);
map.put("rows", 1209L);
map.put("index", 503332.5071372986D);
map.put("addRowsIndexConstant", 504542.5071372986D);
map.put("uniques", QueryRunnerTestHelper.UNIQUES_9);
map.put("maxIndex", 1870.061029D);
map.put("minIndex", 59.02102279663086D);
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Collections.singletonList(map))));
assertExpectedResults(expectedResults, topNQueryWithNULLValueExtraction);
// Assert the optimization path as well
final Sequence<Result<TopNResultValue>> retval = runWithPreMergeAndMerge(topNQueryWithNULLValueExtraction);
TestHelper.assertExpectedResults(expectedResults, retval);
}
Aggregations