Search in sources :

Example 41 with DimFilter

use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Example 42 with DimFilter

use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNWithExtractionFilterAndFilteredAggregatorCaseNoExistingValue.

@Test
public void testTopNWithExtractionFilterAndFilteredAggregatorCaseNoExistingValue() {
    Map<String, String> extractionMap = new HashMap<>();
    MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
    LookupExtractionFn lookupExtractionFn;
    if (NullHandling.replaceWithDefault()) {
        lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false);
        extractionMap.put("", "NULL");
    } else {
        lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, "NULL", true, false);
    }
    DimFilter extractionFilter = new ExtractionDimFilter("null_column", "NULL", lookupExtractionFn, null);
    TopNQueryBuilder topNQueryBuilder = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("null_column").metric(QueryRunnerTestHelper.INDEX_METRIC).threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new FilteredAggregatorFactory(new DoubleMaxAggregatorFactory("maxIndex", "index"), extractionFilter), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT);
    TopNQuery topNQueryWithNULLValueExtraction = topNQueryBuilder.filters(extractionFilter).build();
    Map<String, Object> map = new HashMap<>();
    map.put("null_column", null);
    map.put("rows", 1209L);
    map.put("index", 503332.5071372986D);
    map.put("addRowsIndexConstant", 504542.5071372986D);
    map.put("uniques", QueryRunnerTestHelper.UNIQUES_9);
    map.put("maxIndex", 1870.061029D);
    map.put("minIndex", 59.02102279663086D);
    List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Collections.singletonList(map))));
    assertExpectedResults(expectedResults, topNQueryWithNULLValueExtraction);
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) HashMap(java.util.HashMap) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) Result(org.apache.druid.query.Result) LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 43 with DimFilter

use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.

the class SearchQueryRunnerTest method testSearchWithFilterEmptyResults.

@Test
public void testSearchWithFilterEmptyResults() {
    List<SearchHit> expectedHits = new ArrayList<>();
    DimFilter filter = new AndDimFilter(new SelectorDimFilter(QueryRunnerTestHelper.MARKET_DIMENSION, "total_market", null), new SelectorDimFilter(QueryRunnerTestHelper.QUALITY_DIMENSION, "automotive", null));
    checkSearchQuery(Druids.newSearchQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).filters(filter).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).query("a").build(), expectedHits);
}
Also used : AndDimFilter(org.apache.druid.query.filter.AndDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) ArrayList(java.util.ArrayList) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 44 with DimFilter

use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.

the class SearchQueryRunnerTest method testSearchWithMultiOrFilter.

@Test
public void testSearchWithMultiOrFilter() {
    List<SearchHit> expectedHits = new ArrayList<>();
    expectedHits.add(new SearchHit(QueryRunnerTestHelper.QUALITY_DIMENSION, "automotive", 93));
    DimFilter filter = new OrDimFilter(new SelectorDimFilter(QueryRunnerTestHelper.QUALITY_DIMENSION, "total_market", null), new SelectorDimFilter(QueryRunnerTestHelper.QUALITY_DIMENSION, "automotive", null));
    checkSearchQuery(Druids.newSearchQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimensions(QueryRunnerTestHelper.QUALITY_DIMENSION).filters(filter).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).query("a").build(), expectedHits);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) ArrayList(java.util.ArrayList) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) ExtractionDimFilter(org.apache.druid.query.filter.ExtractionDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 45 with DimFilter

use of org.apache.druid.query.filter.DimFilter in project druid by druid-io.

the class FilterPartitionTest method testDistributeOrCNF.

@Test
public void testDistributeOrCNF() {
    DimFilter dimFilter1 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dim0", "6", null), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dim1", "abdef", null), new SelectorDimFilter("dim2", "c", null)))));
    Filter filter1 = dimFilter1.toFilter();
    Filter filter1CNF = Filters.toCnf(filter1);
    Assert.assertEquals(AndFilter.class, filter1CNF.getClass());
    Assert.assertEquals(2, ((AndFilter) filter1CNF).getFilters().size());
    assertFilterMatches(dimFilter1, ImmutableList.of("4", "6"));
    DimFilter dimFilter2 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dim0", "2", null), new SelectorDimFilter("dim0", "3", null), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dim1", "HELLO", null), new SelectorDimFilter("dim2", "foo", null)))));
    assertFilterMatches(dimFilter2, ImmutableList.of("2", "3", "7"));
    DimFilter dimFilter3 = new OrDimFilter(Arrays.asList(dimFilter1, dimFilter2, new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dim1", "1", null), new SelectorDimFilter("dim2", "foo", null)))));
    assertFilterMatches(dimFilter3, ImmutableList.of("2", "3", "4", "6", "7", "9"));
}
Also used : AndDimFilter(org.apache.druid.query.filter.AndDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Filter(org.apache.druid.query.filter.Filter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Test(org.junit.Test)

Aggregations

DimFilter (org.apache.druid.query.filter.DimFilter)57 AndDimFilter (org.apache.druid.query.filter.AndDimFilter)31 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)30 ArrayList (java.util.ArrayList)29 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)29 OrDimFilter (org.apache.druid.query.filter.OrDimFilter)27 Test (org.junit.Test)21 InDimFilter (org.apache.druid.query.filter.InDimFilter)18 List (java.util.List)14 HashMap (java.util.HashMap)12 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)12 ExtractionDimFilter (org.apache.druid.query.filter.ExtractionDimFilter)11 NotDimFilter (org.apache.druid.query.filter.NotDimFilter)11 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)9 RegexDimFilter (org.apache.druid.query.filter.RegexDimFilter)9 ISE (org.apache.druid.java.util.common.ISE)7 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)7 JavaScriptDimFilter (org.apache.druid.query.filter.JavaScriptDimFilter)7 SearchQueryDimFilter (org.apache.druid.query.filter.SearchQueryDimFilter)7 Interval (org.joda.time.Interval)7