use of io.druid.query.filter.Filter in project druid by druid-io.
the class QueryableIndexStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(Filter filter, Interval interval, VirtualColumns virtualColumns, Granularity gran, boolean descending) {
Interval actualInterval = interval;
long minDataTimestamp = getMinTime().getMillis();
long maxDataTimestamp = getMaxTime().getMillis();
final Interval dataInterval = new Interval(minDataTimestamp, gran.bucketEnd(getMaxTime()).getMillis());
if (!actualInterval.overlaps(dataInterval)) {
return Sequences.empty();
}
if (actualInterval.getStart().isBefore(dataInterval.getStart())) {
actualInterval = actualInterval.withStart(dataInterval.getStart());
}
if (actualInterval.getEnd().isAfter(dataInterval.getEnd())) {
actualInterval = actualInterval.withEnd(dataInterval.getEnd());
}
final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), virtualColumns, index);
/**
* Filters can be applied in two stages:
* pre-filtering: Use bitmap indexes to prune the set of rows to be scanned.
* post-filtering: Iterate through rows and apply the filter to the row values
*
* The pre-filter and post-filter step have an implicit AND relationship. (i.e., final rows are those that
* were not pruned AND those that matched the filter during row scanning)
*
* An AND filter can have its subfilters partitioned across the two steps. The subfilters that can be
* processed entirely with bitmap indexes (subfilter returns true for supportsBitmapIndex())
* will be moved to the pre-filtering stage.
*
* Any subfilters that cannot be processed entirely with bitmap indexes will be moved to the post-filtering stage.
*/
final Offset offset;
final List<Filter> postFilters = new ArrayList<>();
if (filter == null) {
offset = new NoFilterOffset(0, index.getNumRows(), descending);
} else {
final List<Filter> preFilters = new ArrayList<>();
if (filter instanceof AndFilter) {
// If we get an AndFilter, we can split the subfilters across both filtering stages
for (Filter subfilter : ((AndFilter) filter).getFilters()) {
if (subfilter.supportsBitmapIndex(selector)) {
preFilters.add(subfilter);
} else {
postFilters.add(subfilter);
}
}
} else {
// If we get an OrFilter or a single filter, handle the filter in one stage
if (filter.supportsBitmapIndex(selector)) {
preFilters.add(filter);
} else {
postFilters.add(filter);
}
}
if (preFilters.size() == 0) {
offset = new NoFilterOffset(0, index.getNumRows(), descending);
} else {
// Use AndFilter.getBitmapIndex to intersect the preFilters to get its short-circuiting behavior.
offset = BitmapOffset.of(AndFilter.getBitmapIndex(selector, preFilters), descending, (long) getNumRows());
}
}
final Filter postFilter;
if (postFilters.size() == 0) {
postFilter = null;
} else if (postFilters.size() == 1) {
postFilter = postFilters.get(0);
} else {
postFilter = new AndFilter(postFilters);
}
return Sequences.filter(new CursorSequenceBuilder(this, actualInterval, virtualColumns, gran, offset, minDataTimestamp, maxDataTimestamp, descending, postFilter, selector).build(), Predicates.<Cursor>notNull());
}
use of io.druid.query.filter.Filter in project druid by druid-io.
the class TopNQueryEngine method query.
public Sequence<Result<TopNResultValue>> query(final TopNQuery query, final StorageAdapter adapter) {
if (adapter == null) {
throw new SegmentMissingException("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<Interval> queryIntervals = query.getQuerySegmentSpec().getIntervals();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
final Granularity granularity = query.getGranularity();
final Function<Cursor, Result<TopNResultValue>> mapFn = getMapFn(query, adapter);
Preconditions.checkArgument(queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals);
return Sequences.filter(Sequences.map(adapter.makeCursors(filter, queryIntervals.get(0), query.getVirtualColumns(), granularity, query.isDescending()), new Function<Cursor, Result<TopNResultValue>>() {
@Override
public Result<TopNResultValue> apply(Cursor input) {
log.debug("Running over cursor[%s]", adapter.getInterval(), input.getTime());
return mapFn.apply(input);
}
}), Predicates.<Result<TopNResultValue>>notNull());
}
use of io.druid.query.filter.Filter in project druid by druid-io.
the class SelectQueryEngine method process.
public Sequence<Result<SelectResultValue>> process(final SelectQuery query, final Segment segment) {
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
// at the point where this code is called, only one datasource should exist.
String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
final Iterable<DimensionSpec> dims;
if (query.getDimensions() == null || query.getDimensions().isEmpty()) {
dims = DefaultDimensionSpec.toSpec(adapter.getAvailableDimensions());
} else {
dims = query.getDimensions();
}
final Iterable<String> metrics;
if (query.getMetrics() == null || query.getMetrics().isEmpty()) {
metrics = adapter.getAvailableMetrics();
} else {
metrics = query.getMetrics();
}
List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
// should be rewritten with given interval
final String segmentId = DataSegmentUtils.withInterval(dataSource, segment.getIdentifier(), intervals.get(0));
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
return QueryRunnerHelper.makeCursorBasedQuery(adapter, query.getQuerySegmentSpec().getIntervals(), filter, query.getVirtualColumns(), query.isDescending(), query.getGranularity(), new Function<Cursor, Result<SelectResultValue>>() {
@Override
public Result<SelectResultValue> apply(Cursor cursor) {
final SelectResultValueBuilder builder = new SelectResultValueBuilder(cursor.getTime(), query.getPagingSpec(), query.isDescending());
final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
final List<ColumnSelectorPlus<SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
for (DimensionSpec dimSpec : dims) {
builder.addDimension(dimSpec.getOutputName());
}
final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
for (String metric : metrics) {
final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
metSelectors.put(metric, metricSelector);
builder.addMetric(metric);
}
final PagingOffset offset = query.getPagingOffset(segmentId);
cursor.advanceTo(offset.startDelta());
int lastOffset = offset.startOffset();
for (; !cursor.isDone() && offset.hasNext(); cursor.advance(), offset.next()) {
final Map<String, Object> theEvent = singleEvent(EventHolder.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
builder.addEntry(new EventHolder(segmentId, lastOffset = offset.current(), theEvent));
}
builder.finished(segmentId, lastOffset);
return builder.build();
}
});
}
use of io.druid.query.filter.Filter in project druid by druid-io.
the class Filters method flatten.
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
private static Filter flatten(Filter root) {
if (root instanceof BooleanFilter) {
List<Filter> children = new ArrayList<>();
children.addAll(((BooleanFilter) root).getFilters());
// they don't get re-visited
for (int i = 0; i < children.size(); ++i) {
Filter child = flatten(children.get(i));
// do we need to flatten?
if (child.getClass() == root.getClass() && !(child instanceof NotFilter)) {
boolean first = true;
List<Filter> grandKids = ((BooleanFilter) child).getFilters();
for (Filter grandkid : grandKids) {
// for the first grandkid replace the original parent
if (first) {
first = false;
children.set(i, grandkid);
} else {
children.add(++i, grandkid);
}
}
} else {
children.set(i, child);
}
}
// if we have a singleton AND or OR, just return the child
if (children.size() == 1 && (root instanceof AndFilter || root instanceof OrFilter)) {
return children.get(0);
}
if (root instanceof AndFilter) {
return new AndFilter(children);
} else if (root instanceof OrFilter) {
return new OrFilter(children);
}
}
return root;
}
use of io.druid.query.filter.Filter in project druid by druid-io.
the class Filters method generateAllCombinations.
// CNF conversion functions were adapted from Apache Hive, see:
// https://github.com/apache/hive/blob/branch-2.0/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
private static void generateAllCombinations(List<Filter> result, List<Filter> andList, List<Filter> nonAndList) {
List<Filter> children = ((AndFilter) andList.get(0)).getFilters();
if (result.isEmpty()) {
for (Filter child : children) {
List<Filter> a = Lists.newArrayList(nonAndList);
a.add(child);
result.add(new OrFilter(a));
}
} else {
List<Filter> work = new ArrayList<>(result);
result.clear();
for (Filter child : children) {
for (Filter or : work) {
List<Filter> a = Lists.newArrayList((((OrFilter) or).getFilters()));
a.add(child);
result.add(new OrFilter(a));
}
}
}
if (andList.size() > 1) {
generateAllCombinations(result, andList.subList(1, andList.size()), nonAndList);
}
}
Aggregations