use of io.druid.query.filter.DimFilter in project druid by druid-io.
the class SearchBenchmark method basicB.
private static SearchQueryBuilder basicB(final BenchmarkSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
final List<String> dimUniformFilterVals = Lists.newArrayList();
int resultNum = (int) (100000 * 0.1);
int step = 100000 / resultNum;
for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) {
dimUniformFilterVals.add(String.valueOf(i));
}
List<String> dimHyperUniqueFilterVals = Lists.newArrayList();
resultNum = (int) (100000 * 0.1);
step = 100000 / resultNum;
for (int i = 0; i < 100001 && dimHyperUniqueFilterVals.size() < resultNum; i += step) {
dimHyperUniqueFilterVals.add(String.valueOf(i));
}
final List<DimFilter> dimFilters = Lists.newArrayList();
dimFilters.add(new InDimFilter("dimUniform", dimUniformFilterVals, null));
dimFilters.add(new InDimFilter("dimHyperUnique", dimHyperUniqueFilterVals, null));
return Druids.newSearchQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).query("").dimensions(Lists.newArrayList("dimUniform", "dimHyperUnique")).filters(new AndDimFilter(dimFilters));
}
use of io.druid.query.filter.DimFilter in project druid by druid-io.
the class SearchBenchmark method basicC.
private static SearchQueryBuilder basicC(final BenchmarkSchemaInfo basicSchema) {
final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
final List<String> dimUniformFilterVals = Lists.newArrayList();
final int resultNum = (int) (100000 * 0.1);
final int step = 100000 / resultNum;
for (int i = 1; i < 100001 && dimUniformFilterVals.size() < resultNum; i += step) {
dimUniformFilterVals.add(String.valueOf(i));
}
final String dimName = "dimUniform";
final List<DimFilter> dimFilters = Lists.newArrayList();
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, IdentityExtractionFn.getInstance()));
dimFilters.add(new SelectorDimFilter(dimName, "3", StrlenExtractionFn.instance()));
dimFilters.add(new BoundDimFilter(dimName, "100", "10000", true, true, true, new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[] { 0xF };
}
@Override
public String apply(String value) {
return String.valueOf(Long.parseLong(value) + 1);
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.ONE_TO_ONE;
}
}, null));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new LowerExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new UpperExtractionFn(null)));
dimFilters.add(new InDimFilter(dimName, dimUniformFilterVals, new SubstringDimExtractionFn(1, 3)));
return Druids.newSearchQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).query("").dimensions(Lists.newArrayList("dimUniform")).filters(new AndDimFilter(dimFilters));
}
use of io.druid.query.filter.DimFilter in project druid by druid-io.
the class SearchQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<SearchResultValue>, Object, SearchQuery> getCacheStrategy(final SearchQuery query) {
return new CacheStrategy<Result<SearchResultValue>, Object, SearchQuery>() {
private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
}) : Collections.<String>emptyList();
@Override
public boolean isCacheable(SearchQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(SearchQuery query) {
final DimFilter dimFilter = query.getDimensionsFilter();
final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
final byte[] querySpecBytes = query.getQuery().getCacheKey();
final byte[] granularityBytes = query.getGranularity().getCacheKey();
final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
int dimensionsBytesSize = 0;
int index = 0;
for (DimensionSpec dimensionSpec : dimensionSpecs) {
dimensionsBytes[index] = dimensionSpec.getCacheKey();
dimensionsBytesSize += dimensionsBytes[index].length;
++index;
}
final byte[] sortSpecBytes = query.getSort().getCacheKey();
final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + 4 + granularityBytes.length + filterBytes.length + querySpecBytes.length + dimensionsBytesSize + sortSpecBytes.length).put(SEARCH_QUERY).put(Ints.toByteArray(query.getLimit())).put(granularityBytes).put(filterBytes).put(querySpecBytes).put(sortSpecBytes);
for (byte[] bytes : dimensionsBytes) {
queryCacheKey.put(bytes);
}
return queryCacheKey.array();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<SearchResultValue>, Object> prepareForCache() {
return new Function<Result<SearchResultValue>, Object>() {
@Override
public Object apply(Result<SearchResultValue> input) {
return dimensionSpecs.size() > 0 ? Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue(), dimOutputNames) : Lists.newArrayList(input.getTimestamp().getMillis(), input.getValue());
}
};
}
@Override
public Function<Object, Result<SearchResultValue>> pullFromCache() {
return new Function<Object, Result<SearchResultValue>>() {
@Override
@SuppressWarnings("unchecked")
public Result<SearchResultValue> apply(Object input) {
List<Object> result = (List<Object>) input;
boolean needsRename = false;
final Map<String, String> outputNameMap = Maps.newHashMap();
if (hasOutputName(result)) {
List<String> cachedOutputNames = (List) result.get(2);
Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "cache hit, but number of dimensions mismatch");
needsRename = false;
for (int idx = 0; idx < cachedOutputNames.size(); idx++) {
String cachedOutputName = cachedOutputNames.get(idx);
String outputName = dimOutputNames.get(idx);
if (!cachedOutputName.equals(outputName)) {
needsRename = true;
}
outputNameMap.put(cachedOutputName, outputName);
}
}
return !needsRename ? new Result<>(new DateTime(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {
@Override
public SearchHit apply(@Nullable Object input) {
if (input instanceof Map) {
return new SearchHit((String) ((Map) input).get("dimension"), (String) ((Map) input).get("value"), (Integer) ((Map) input).get("count"));
} else if (input instanceof SearchHit) {
return (SearchHit) input;
} else {
throw new IAE("Unknown format [%s]", input.getClass());
}
}
}))) : new Result<>(new DateTime(((Number) result.get(0)).longValue()), new SearchResultValue(Lists.transform((List) result.get(1), new Function<Object, SearchHit>() {
@Override
public SearchHit apply(@Nullable Object input) {
String dim = null;
String val = null;
Integer cnt = null;
if (input instanceof Map) {
dim = outputNameMap.get((String) ((Map) input).get("dimension"));
val = (String) ((Map) input).get("value");
cnt = (Integer) ((Map) input).get("count");
} else if (input instanceof SearchHit) {
SearchHit cached = (SearchHit) input;
dim = outputNameMap.get(cached.getDimension());
val = cached.getValue();
cnt = cached.getCount();
} else {
throw new IAE("Unknown format [%s]", input.getClass());
}
return new SearchHit(dim, val, cnt);
}
})));
}
};
}
private boolean hasOutputName(List<Object> cachedEntry) {
/*
* cached entry is list of two or three objects
* 1. timestamp
* 2. SearchResultValue
* 3. outputName of each dimension (optional)
*
* if a cached entry has three objects, dimension name of SearchResultValue should be check if rename is needed
*/
return cachedEntry.size() == 3;
}
};
}
use of io.druid.query.filter.DimFilter in project druid by druid-io.
the class SelectQueryQueryToolChest method getCacheStrategy.
@Override
public CacheStrategy<Result<SelectResultValue>, Object, SelectQuery> getCacheStrategy(final SelectQuery query) {
return new CacheStrategy<Result<SelectResultValue>, Object, SelectQuery>() {
private final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
private final List<String> dimOutputNames = dimensionSpecs.size() > 0 ? Lists.transform(dimensionSpecs, new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
}) : Collections.<String>emptyList();
@Override
public boolean isCacheable(SelectQuery query, boolean willMergeRunners) {
return true;
}
@Override
public byte[] computeCacheKey(SelectQuery query) {
final DimFilter dimFilter = query.getDimensionsFilter();
final byte[] filterBytes = dimFilter == null ? new byte[] {} : dimFilter.getCacheKey();
final byte[] granularityBytes = query.getGranularity().getCacheKey();
final List<DimensionSpec> dimensionSpecs = query.getDimensions() != null ? query.getDimensions() : Collections.<DimensionSpec>emptyList();
final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
int dimensionsBytesSize = 0;
int index = 0;
for (DimensionSpec dimension : dimensionSpecs) {
dimensionsBytes[index] = dimension.getCacheKey();
dimensionsBytesSize += dimensionsBytes[index].length;
++index;
}
final Set<String> metrics = Sets.newTreeSet();
if (query.getMetrics() != null) {
metrics.addAll(query.getMetrics());
}
final byte[][] metricBytes = new byte[metrics.size()][];
int metricBytesSize = 0;
index = 0;
for (String metric : metrics) {
metricBytes[index] = StringUtils.toUtf8(metric);
metricBytesSize += metricBytes[index].length;
++index;
}
final byte[] virtualColumnsCacheKey = query.getVirtualColumns().getCacheKey();
final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + granularityBytes.length + filterBytes.length + query.getPagingSpec().getCacheKey().length + dimensionsBytesSize + metricBytesSize + virtualColumnsCacheKey.length).put(SELECT_QUERY).put(granularityBytes).put(filterBytes).put(query.getPagingSpec().getCacheKey());
for (byte[] dimensionsByte : dimensionsBytes) {
queryCacheKey.put(dimensionsByte);
}
for (byte[] metricByte : metricBytes) {
queryCacheKey.put(metricByte);
}
queryCacheKey.put(virtualColumnsCacheKey);
return queryCacheKey.array();
}
@Override
public TypeReference<Object> getCacheObjectClazz() {
return OBJECT_TYPE_REFERENCE;
}
@Override
public Function<Result<SelectResultValue>, Object> prepareForCache() {
return new Function<Result<SelectResultValue>, Object>() {
@Override
public Object apply(final Result<SelectResultValue> input) {
if (!dimOutputNames.isEmpty()) {
return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents(), dimOutputNames);
}
return Arrays.asList(input.getTimestamp().getMillis(), input.getValue().getPagingIdentifiers(), input.getValue().getDimensions(), input.getValue().getMetrics(), input.getValue().getEvents());
}
};
}
@Override
public Function<Object, Result<SelectResultValue>> pullFromCache() {
return new Function<Object, Result<SelectResultValue>>() {
private final Granularity granularity = query.getGranularity();
@Override
public Result<SelectResultValue> apply(Object input) {
List<Object> results = (List<Object>) input;
Iterator<Object> resultIter = results.iterator();
DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());
Map<String, Integer> pageIdentifier = jsonMapper.convertValue(resultIter.next(), new TypeReference<Map<String, Integer>>() {
});
Set<String> dimensionSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
});
Set<String> metricSet = jsonMapper.convertValue(resultIter.next(), new TypeReference<Set<String>>() {
});
List<EventHolder> eventHolders = jsonMapper.convertValue(resultIter.next(), new TypeReference<List<EventHolder>>() {
});
// check the condition that outputName of cached result should be updated
if (resultIter.hasNext()) {
List<String> cachedOutputNames = (List<String>) resultIter.next();
Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(), "Cache hit but different number of dimensions??");
for (int idx = 0; idx < dimOutputNames.size(); idx++) {
if (!cachedOutputNames.get(idx).equals(dimOutputNames.get(idx))) {
// rename outputName in the EventHolder
for (EventHolder eventHolder : eventHolders) {
Object obj = eventHolder.getEvent().remove(cachedOutputNames.get(idx));
if (obj != null) {
eventHolder.getEvent().put(dimOutputNames.get(idx), obj);
}
}
}
}
}
return new Result<>(timestamp, new SelectResultValue(pageIdentifier, dimensionSet, metricSet, eventHolders));
}
};
}
};
}
use of io.druid.query.filter.DimFilter in project druid by druid-io.
the class CachingClusteredClientTest method testSingleDimensionPruning.
@Test
public void testSingleDimensionPruning() throws Exception {
DimFilter filter = Druids.newAndDimFilterBuilder().fields(Arrays.asList(Druids.newOrDimFilterBuilder().fields(Arrays.asList(new SelectorDimFilter("dim1", "a", null), new BoundDimFilter("dim1", "from", "to", false, false, false, null, StringComparators.LEXICOGRAPHIC))).build(), Druids.newAndDimFilterBuilder().fields(Arrays.asList(new InDimFilter("dim2", Arrays.asList("a", "c", "e", "g"), null), new BoundDimFilter("dim2", "aaa", "hi", false, false, false, null, StringComparators.LEXICOGRAPHIC), new BoundDimFilter("dim2", "e", "zzz", true, true, false, null, StringComparators.LEXICOGRAPHIC))).build())).build();
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).filters(filter).granularity(GRANULARITY).intervals(SEG_SPEC).context(CONTEXT).intervals("2011-01-05/2011-01-10").aggregators(RENAMED_AGGS).postAggregators(RENAMED_POST_AGGS);
TimeseriesQuery query = builder.build();
Map<String, List> context = new HashMap<>();
final Interval interval1 = new Interval("2011-01-06/2011-01-07");
final Interval interval2 = new Interval("2011-01-07/2011-01-08");
final Interval interval3 = new Interval("2011-01-08/2011-01-09");
QueryRunner runner = new FinalizeResultsQueryRunner(client, new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()));
final DruidServer lastServer = servers[random.nextInt(servers.length)];
ServerSelector selector1 = makeMockSingleDimensionSelector(lastServer, "dim1", null, "b", 1);
ServerSelector selector2 = makeMockSingleDimensionSelector(lastServer, "dim1", "e", "f", 2);
ServerSelector selector3 = makeMockSingleDimensionSelector(lastServer, "dim1", "hi", "zzz", 3);
ServerSelector selector4 = makeMockSingleDimensionSelector(lastServer, "dim2", "a", "e", 4);
ServerSelector selector5 = makeMockSingleDimensionSelector(lastServer, "dim2", null, null, 5);
ServerSelector selector6 = makeMockSingleDimensionSelector(lastServer, "other", "b", null, 6);
timeline.add(interval1, "v", new StringPartitionChunk<>(null, "a", 1, selector1));
timeline.add(interval1, "v", new StringPartitionChunk<>("a", "b", 2, selector2));
timeline.add(interval1, "v", new StringPartitionChunk<>("b", null, 3, selector3));
timeline.add(interval2, "v", new StringPartitionChunk<>(null, "d", 4, selector4));
timeline.add(interval2, "v", new StringPartitionChunk<>("d", null, 5, selector5));
timeline.add(interval3, "v", new StringPartitionChunk<>(null, null, 6, selector6));
final Capture<TimeseriesQuery> capture = Capture.newInstance();
final Capture<Map<String, List>> contextCap = Capture.newInstance();
QueryRunner mockRunner = EasyMock.createNiceMock(QueryRunner.class);
EasyMock.expect(mockRunner.run(EasyMock.capture(capture), EasyMock.capture(contextCap))).andReturn(Sequences.empty()).anyTimes();
EasyMock.expect(serverView.getQueryRunner(lastServer)).andReturn(mockRunner).anyTimes();
EasyMock.replay(serverView);
EasyMock.replay(mockRunner);
List<SegmentDescriptor> descriptors = new ArrayList<>();
descriptors.add(new SegmentDescriptor(interval1, "v", 1));
descriptors.add(new SegmentDescriptor(interval1, "v", 3));
descriptors.add(new SegmentDescriptor(interval2, "v", 5));
descriptors.add(new SegmentDescriptor(interval3, "v", 6));
MultipleSpecificSegmentSpec expected = new MultipleSpecificSegmentSpec(descriptors);
Sequences.toList(runner.run(query, context), Lists.newArrayList());
Assert.assertEquals(expected, capture.getValue().getQuerySegmentSpec());
}
Aggregations