Search in sources :

Example 1 with Filter

use of io.druid.query.filter.Filter in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final Map<String, Object> responseContext) {
    if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) != null) {
        int count = (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
        if (count >= query.getLimit()) {
            return Sequences.empty();
        }
    }
    final Long timeoutAt = (long) responseContext.get(ScanQueryRunnerFactory.CTX_TIMEOUT_AT);
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    List<String> allDims = Lists.newLinkedList(adapter.getAvailableDimensions());
    List<String> allMetrics = Lists.newLinkedList(adapter.getAvailableMetrics());
    final List<String> allColumns = Lists.newLinkedList();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (!query.getColumns().contains(ScanResultValue.timestampKey)) {
            allColumns.add(ScanResultValue.timestampKey);
        }
        allColumns.addAll(query.getColumns());
        allDims.retainAll(query.getColumns());
        allMetrics.retainAll(query.getColumns());
    } else {
        if (!allDims.contains(ScanResultValue.timestampKey)) {
            allColumns.add(ScanResultValue.timestampKey);
        }
        allColumns.addAll(allDims);
        allColumns.addAll(allMetrics);
    }
    final List<DimensionSpec> dims = DefaultDimensionSpec.toSpec(allDims);
    final List<String> metrics = allMetrics;
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final String segmentId = segment.getIdentifier();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
    if (responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) == null) {
        responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, 0);
    }
    final int limit = query.getLimit() - (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT);
    return Sequences.concat(Sequences.map(adapter.makeCursors(filter, intervals.get(0), VirtualColumns.EMPTY, Granularities.ALL, query.isDescending()), new Function<Cursor, Sequence<ScanResultValue>>() {

        @Override
        public Sequence<ScanResultValue> apply(final Cursor cursor) {
            return new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

                @Override
                public Iterator<ScanResultValue> make() {
                    final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
                    final List<ColumnSelectorPlus<SelectQueryEngine.SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
                    final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
                    for (String metric : metrics) {
                        final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
                        metSelectors.put(metric, metricSelector);
                    }
                    final int batchSize = query.getBatchSize();
                    return new Iterator<ScanResultValue>() {

                        private int offset = 0;

                        @Override
                        public boolean hasNext() {
                            return !cursor.isDone() && offset < limit;
                        }

                        @Override
                        public ScanResultValue next() {
                            if (System.currentTimeMillis() >= timeoutAt) {
                                throw new QueryInterruptedException(new TimeoutException());
                            }
                            int lastOffset = offset;
                            Object events = null;
                            String resultFormat = query.getResultFormat();
                            if (ScanQuery.RESULT_FORMAT_VALUE_VECTOR.equals(resultFormat)) {
                                throw new UnsupportedOperationException("valueVector is not supported now");
                            } else if (ScanQuery.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                                events = rowsToCompactedList();
                            } else {
                                events = rowsToList();
                            }
                            responseContext.put(ScanQueryRunnerFactory.CTX_COUNT, (int) responseContext.get(ScanQueryRunnerFactory.CTX_COUNT) + (offset - lastOffset));
                            responseContext.put(ScanQueryRunnerFactory.CTX_TIMEOUT_AT, timeoutAt - (System.currentTimeMillis() - start));
                            return new ScanResultValue(segmentId, allColumns, events);
                        }

                        @Override
                        public void remove() {
                            throw new UnsupportedOperationException();
                        }

                        private Object rowsToCompactedList() {
                            return Lists.transform((List<Map<String, Object>>) rowsToList(), new Function<Map<String, Object>, Object>() {

                                @Override
                                public Object apply(Map<String, Object> input) {
                                    List eventValues = Lists.newArrayListWithExpectedSize(allColumns.size());
                                    for (String expectedColumn : allColumns) {
                                        eventValues.add(input.get(expectedColumn));
                                    }
                                    return eventValues;
                                }
                            });
                        }

                        private Object rowsToList() {
                            List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                            for (int i = 0; !cursor.isDone() && i < batchSize && offset < limit; cursor.advance(), i++, offset++) {
                                final Map<String, Object> theEvent = SelectQueryEngine.singleEvent(ScanResultValue.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
                                events.add(theEvent);
                            }
                            return events;
                        }

                        private Object rowsToValueVector() {
                            // only support list now, we can support ValueVector or Arrow in future
                            return rowsToList();
                        }
                    };
                }

                @Override
                public void cleanup(Iterator<ScanResultValue> iterFromMake) {
                }
            });
        }
    }));
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) StorageAdapter(io.druid.segment.StorageAdapter) Cursor(io.druid.segment.Cursor) Function(com.google.common.base.Function) Iterator(java.util.Iterator) ISE(io.druid.java.util.common.ISE) LongColumnSelector(io.druid.segment.LongColumnSelector) List(java.util.List) ObjectColumnSelector(io.druid.segment.ObjectColumnSelector) QueryInterruptedException(io.druid.query.QueryInterruptedException) TimeoutException(java.util.concurrent.TimeoutException) BaseSequence(io.druid.java.util.common.guava.BaseSequence) Filter(io.druid.query.filter.Filter) Map(java.util.Map) Interval(org.joda.time.Interval)

Example 2 with Filter

use of io.druid.query.filter.Filter in project druid by druid-io.

the class FilterPartitionTest method testDistributeOrCNF.

@Test
public void testDistributeOrCNF() {
    DimFilter dimFilter1 = new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("dim0", "6", null), new AndDimFilter(Arrays.<DimFilter>asList(new NoBitmapSelectorDimFilter("dim1", "def", null), new SelectorDimFilter("dim2", "c", null)))));
    Filter filter1 = dimFilter1.toFilter();
    Filter filter1CNF = Filters.convertToCNF(filter1);
    Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
    Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
    assertFilterMatches(dimFilter1, ImmutableList.of("4", "6"));
    DimFilter dimFilter2 = new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("dim0", "2", null), new SelectorDimFilter("dim0", "3", null), new AndDimFilter(Arrays.<DimFilter>asList(new NoBitmapSelectorDimFilter("dim1", "HELLO", null), new SelectorDimFilter("dim2", "foo", null)))));
    assertFilterMatches(dimFilter2, ImmutableList.of("2", "3", "7"));
    DimFilter dimFilter3 = new OrDimFilter(Arrays.<DimFilter>asList(dimFilter1, dimFilter2, new AndDimFilter(Arrays.<DimFilter>asList(new NoBitmapSelectorDimFilter("dim1", "1", null), new SelectorDimFilter("dim2", "foo", null)))));
    assertFilterMatches(dimFilter3, ImmutableList.of("2", "3", "4", "6", "7", "9"));
}
Also used : AndDimFilter(io.druid.query.filter.AndDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) Filter(io.druid.query.filter.Filter) AndDimFilter(io.druid.query.filter.AndDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) Test(org.junit.Test)

Example 3 with Filter

use of io.druid.query.filter.Filter in project druid by druid-io.

the class ExtractionDimFilterTest method testNormal.

@Test
public void testNormal() {
    Filter extractionFilter = new SelectorDimFilter("foo", "extractDimVal", DIM_EXTRACTION_FN).toFilter();
    ImmutableBitmap immutableBitmap = extractionFilter.getBitmapIndex(BITMAP_INDEX_SELECTOR);
    Assert.assertEquals(1, immutableBitmap.size());
}
Also used : SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) ExtractionDimFilter(io.druid.query.filter.ExtractionDimFilter) Filter(io.druid.query.filter.Filter) ImmutableBitmap(io.druid.collections.bitmap.ImmutableBitmap) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) Test(org.junit.Test)

Example 4 with Filter

use of io.druid.query.filter.Filter in project druid by druid-io.

the class FilterPartitionBenchmark method readWithExFnPostFilter.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithExFnPostFilter(Blackhole blackhole) throws Exception {
    Filter filter = new NoBitmapSelectorDimFilter("dimSequential", "super-199", JS_EXTRACTION_FN).toFilter();
    StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
    Sequence<Cursor> cursors = makeCursors(sa, filter);
    Sequence<List<String>> stringListSeq = readCursors(cursors, blackhole);
    List<String> strings = Sequences.toList(Sequences.limit(stringListSeq, 1), Lists.<List<String>>newArrayList()).get(0);
    for (String st : strings) {
        blackhole.consume(st);
    }
}
Also used : AndFilter(io.druid.segment.filter.AndFilter) BoundFilter(io.druid.segment.filter.BoundFilter) DimensionPredicateFilter(io.druid.segment.filter.DimensionPredicateFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) SelectorFilter(io.druid.segment.filter.SelectorFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) Filter(io.druid.query.filter.Filter) OrFilter(io.druid.segment.filter.OrFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) StorageAdapter(io.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(io.druid.segment.QueryableIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) Cursor(io.druid.segment.Cursor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 5 with Filter

use of io.druid.query.filter.Filter in project druid by druid-io.

the class GroupByRowProcessor method process.

public static Sequence<Row> process(final Query queryParam, final Sequence<Row> rows, final Map<String, ValueType> rowSignature, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir) {
    final GroupByQuery query = (GroupByQuery) queryParam;
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final AggregatorFactory[] aggregatorFactories = new AggregatorFactory[query.getAggregatorSpecs().size()];
    for (int i = 0; i < query.getAggregatorSpecs().size(); i++) {
        aggregatorFactories[i] = query.getAggregatorSpecs().get(i);
    }
    final File temporaryStorageDirectory = new File(processingTmpDir, String.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
    final List<Interval> queryIntervals = query.getIntervals();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
    final SettableSupplier<Row> rowSupplier = new SettableSupplier<>();
    final RowBasedColumnSelectorFactory columnSelectorFactory = RowBasedColumnSelectorFactory.create(rowSupplier, rowSignature);
    final ValueMatcher filterMatcher = filter == null ? BooleanValueMatcher.of(true) : filter.makeMatcher(columnSelectorFactory);
    final FilteredSequence<Row> filteredSequence = new FilteredSequence<>(rows, new Predicate<Row>() {

        @Override
        public boolean apply(Row input) {
            boolean inInterval = false;
            DateTime rowTime = input.getTimestamp();
            for (Interval queryInterval : queryIntervals) {
                if (queryInterval.contains(rowTime)) {
                    inInterval = true;
                    break;
                }
            }
            if (!inInterval) {
                return false;
            }
            rowSupplier.set(input);
            return filterMatcher.matches();
        }
    });
    return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, CloseableGrouperIterator<RowBasedKey, Row>>() {

        @Override
        public CloseableGrouperIterator<RowBasedKey, Row> make() {
            // This contains all closeable objects which are closed when the returned iterator iterates all the elements,
            // or an exceptions is thrown. The objects are closed in their reverse order.
            final List<Closeable> closeOnExit = Lists.newArrayList();
            try {
                final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
                closeOnExit.add(temporaryStorage);
                Pair<Grouper<RowBasedKey>, Accumulator<Grouper<RowBasedKey>, Row>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, true, rowSignature, querySpecificConfig, new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
                        closeOnExit.add(mergeBufferHolder);
                        return mergeBufferHolder.get();
                    }
                }, -1, temporaryStorage, spillMapper, aggregatorFactories);
                final Grouper<RowBasedKey> grouper = pair.lhs;
                final Accumulator<Grouper<RowBasedKey>, Row> accumulator = pair.rhs;
                closeOnExit.add(grouper);
                final Grouper<RowBasedKey> retVal = filteredSequence.accumulate(grouper, accumulator);
                if (retVal != grouper) {
                    throw GroupByQueryHelper.throwAccumulationResourceLimitExceededException();
                }
                return RowBasedGrouperHelper.makeGrouperIterator(grouper, query, new Closeable() {

                    @Override
                    public void close() throws IOException {
                        for (Closeable closeable : Lists.reverse(closeOnExit)) {
                            CloseQuietly.close(closeable);
                        }
                    }
                });
            } catch (Throwable e) {
                // Exception caught while setting up the iterator; release resources.
                for (Closeable closeable : Lists.reverse(closeOnExit)) {
                    CloseQuietly.close(closeable);
                }
                throw e;
            }
        }

        @Override
        public void cleanup(CloseableGrouperIterator<RowBasedKey, Row> iterFromMake) {
            iterFromMake.close();
        }
    });
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) Closeable(java.io.Closeable) RowBasedColumnSelectorFactory(io.druid.query.groupby.RowBasedColumnSelectorFactory) DateTime(org.joda.time.DateTime) GroupByQuery(io.druid.query.groupby.GroupByQuery) List(java.util.List) Supplier(com.google.common.base.Supplier) SettableSupplier(io.druid.common.guava.SettableSupplier) Pair(io.druid.java.util.common.Pair) BooleanValueMatcher(io.druid.segment.filter.BooleanValueMatcher) ValueMatcher(io.druid.query.filter.ValueMatcher) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) FilteredSequence(io.druid.java.util.common.guava.FilteredSequence) RowBasedKey(io.druid.query.groupby.epinephelinae.RowBasedGrouperHelper.RowBasedKey) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ByteBuffer(java.nio.ByteBuffer) BaseSequence(io.druid.java.util.common.guava.BaseSequence) SettableSupplier(io.druid.common.guava.SettableSupplier) Filter(io.druid.query.filter.Filter) Row(io.druid.data.input.Row) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

Filter (io.druid.query.filter.Filter)25 ArrayList (java.util.ArrayList)14 DimFilter (io.druid.query.filter.DimFilter)13 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)11 Cursor (io.druid.segment.Cursor)11 List (java.util.List)10 AndDimFilter (io.druid.query.filter.AndDimFilter)8 OrDimFilter (io.druid.query.filter.OrDimFilter)8 StorageAdapter (io.druid.segment.StorageAdapter)8 BooleanFilter (io.druid.query.filter.BooleanFilter)7 AndFilter (io.druid.segment.filter.AndFilter)7 ImmutableBitmap (io.druid.collections.bitmap.ImmutableBitmap)6 BoundDimFilter (io.druid.query.filter.BoundDimFilter)6 QueryableIndexStorageAdapter (io.druid.segment.QueryableIndexStorageAdapter)6 Interval (org.joda.time.Interval)6 BoundFilter (io.druid.segment.filter.BoundFilter)5 DimensionPredicateFilter (io.druid.segment.filter.DimensionPredicateFilter)5 OrFilter (io.druid.segment.filter.OrFilter)5 SelectorFilter (io.druid.segment.filter.SelectorFilter)5 Test (org.junit.Test)5