Search in sources :

Example 11 with RegexDimFilter

use of org.apache.druid.query.filter.RegexDimFilter in project druid by druid-io.

the class FilteredAggregatorBenchmark method setup.

/**
 * Setup everything common for benchmarking both the incremental-index and the queriable-index.
 */
@Setup
public void setup() {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schema);
    generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
    filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    filteredMetric = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
    factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    List<AggregatorFactory> queryAggs = Collections.singletonList(filteredMetric);
    query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataGenerator(org.apache.druid.segment.generator.DataGenerator) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Setup(org.openjdk.jmh.annotations.Setup)

Example 12 with RegexDimFilter

use of org.apache.druid.query.filter.RegexDimFilter in project druid by druid-io.

the class IncrementalIndexReadBenchmark method readWithFilters.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithFilters(Blackhole blackhole) {
    DimFilter filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
    Sequence<Cursor> cursors = makeCursors(sa, filter);
    Cursor cursor = cursors.limit(1).toList().get(0);
    List<DimensionSelector> selectors = new ArrayList<>();
    selectors.add(makeDimensionSelector(cursor, "dimSequential"));
    selectors.add(makeDimensionSelector(cursor, "dimZipf"));
    selectors.add(makeDimensionSelector(cursor, "dimUniform"));
    selectors.add(makeDimensionSelector(cursor, "dimSequentialHalfNull"));
    cursor.reset();
    while (!cursor.isDone()) {
        for (DimensionSelector selector : selectors) {
            IndexedInts row = selector.getRow();
            blackhole.consume(selector.lookupName(row.get(0)));
        }
        cursor.advance();
    }
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimensionSelector(org.apache.druid.segment.DimensionSelector) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) ArrayList(java.util.ArrayList) Cursor(org.apache.druid.segment.Cursor) IndexedInts(org.apache.druid.segment.data.IndexedInts) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 13 with RegexDimFilter

use of org.apache.druid.query.filter.RegexDimFilter in project druid by druid-io.

the class TimeseriesQueryRunnerTest method testTimeseriesWithRegexFilter.

@Test
public void testTimeseriesWithRegexFilter() {
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.DAY_GRAN).filters(new RegexDimFilter(QueryRunnerTestHelper.MARKET_DIMENSION, "^.p.*$", null)).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(QueryRunnerTestHelper.ROWS_COUNT, QueryRunnerTestHelper.INDEX_LONG_SUM, QueryRunnerTestHelper.QUALITY_UNIQUES).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).descending(descending).context(makeContext()).build();
    List<Result<TimeseriesResultValue>> expectedResults = Arrays.asList(new Result<>(DateTimes.of("2011-04-01"), new TimeseriesResultValue(ImmutableMap.of("rows", 11L, "index", 3783L, "addRowsIndexConstant", 3795.0, "uniques", QueryRunnerTestHelper.UNIQUES_9))), new Result<>(DateTimes.of("2011-04-02"), new TimeseriesResultValue(ImmutableMap.of("rows", 11L, "index", 3313L, "addRowsIndexConstant", 3325.0, "uniques", QueryRunnerTestHelper.UNIQUES_9))));
    Iterable<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
    assertExpectedResults(expectedResults, results);
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) Result(org.apache.druid.query.Result) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 14 with RegexDimFilter

use of org.apache.druid.query.filter.RegexDimFilter in project druid by druid-io.

the class TimeFilteringTest method testTimeFilterWithExtractionFn.

@Test
public void testTimeFilterWithExtractionFn() {
    final Map<String, String> stringMap = new HashMap<>();
    stringMap.put("0", "Monday");
    stringMap.put("1", "Tuesday");
    stringMap.put("2", "Wednesday");
    stringMap.put("3", "Thursday");
    stringMap.put("4", "Friday");
    stringMap.put("5", "Saturday");
    LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
    LookupExtractionFn exfn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
    assertFilterMatches(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Monday", exfn), ImmutableList.of("0"));
    assertFilterMatches(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Notaday", exfn), ImmutableList.of());
    assertFilterMatches(new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Fridax", "Fridaz", false, false, null, exfn, StringComparators.ALPHANUMERIC), ImmutableList.of("4"));
    assertFilterMatches(new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "Friday", "Friday", true, true, null, exfn, StringComparators.ALPHANUMERIC), ImmutableList.of());
    assertFilterMatches(new InDimFilter(ColumnHolder.TIME_COLUMN_NAME, Arrays.asList("Caturday", "Saturday", "Tuesday"), exfn), ImmutableList.of("1", "5"));
    // test InFilter HashSet implementation
    List<String> bigList = Arrays.asList("Saturday", "Tuesday", "Caturday", "Xanaday", "Vojuday", "Gribaday", "Kipoday", "Dheferday", "Fakeday", "Qeearaday", "Hello", "World", "1", "2", "3", "4", "5", "6", "7");
    assertFilterMatches(new InDimFilter(ColumnHolder.TIME_COLUMN_NAME, bigList, exfn), ImmutableList.of("1", "5"));
    String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }";
    assertFilterMatchesSkipVectorize(new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("2", "3"));
    assertFilterMatches(new RegexDimFilter(ColumnHolder.TIME_COLUMN_NAME, ".*day", exfn), ImmutableList.of("0", "1", "2", "3", "4", "5"));
    assertFilterMatches(new SearchQueryDimFilter(ColumnHolder.TIME_COLUMN_NAME, new ContainsSearchQuerySpec("s", true), exfn), ImmutableList.of("1", "2", "3"));
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) HashMap(java.util.HashMap) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) LookupExtractor(org.apache.druid.query.lookup.LookupExtractor) LookupExtractionFn(org.apache.druid.query.lookup.LookupExtractionFn) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) MapLookupExtractor(org.apache.druid.query.extraction.MapLookupExtractor) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) Test(org.junit.Test)

Example 15 with RegexDimFilter

use of org.apache.druid.query.filter.RegexDimFilter in project druid by druid-io.

the class TimeFilteringTest method testTimeFilterAsLong.

@Test
public void testTimeFilterAsLong() {
    assertFilterMatches(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "0", null), ImmutableList.of("0"));
    assertFilterMatches(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "9000", null), ImmutableList.of());
    assertFilterMatches(new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "0", "4", false, false, null, null, StringComparators.NUMERIC), ImmutableList.of("0", "1", "2", "3", "4"));
    assertFilterMatches(new BoundDimFilter(ColumnHolder.TIME_COLUMN_NAME, "0", "4", true, true, null, null, StringComparators.NUMERIC), ImmutableList.of("1", "2", "3"));
    assertFilterMatches(new InDimFilter(ColumnHolder.TIME_COLUMN_NAME, Arrays.asList("2", "4", "8"), null), ImmutableList.of("2", "4"));
    // cross the hashing threshold to test hashset implementation, filter on even values
    List<String> infilterValues = new ArrayList<>(NUM_FILTER_VALUES);
    for (int i = 0; i < NUM_FILTER_VALUES; i++) {
        infilterValues.add(String.valueOf(i * 2));
    }
    assertFilterMatches(new InDimFilter(ColumnHolder.TIME_COLUMN_NAME, infilterValues, null), ImmutableList.of("0", "2", "4"));
    String jsFn = "function(x) { return(x === 3 || x === 5) }";
    assertFilterMatchesSkipVectorize(new JavaScriptDimFilter(ColumnHolder.TIME_COLUMN_NAME, jsFn, null, JavaScriptConfig.getEnabledInstance()), ImmutableList.of("3", "5"));
    assertFilterMatches(new RegexDimFilter(ColumnHolder.TIME_COLUMN_NAME, "4", null), ImmutableList.of("4"));
    assertFilterMatches(new SearchQueryDimFilter(ColumnHolder.TIME_COLUMN_NAME, new ContainsSearchQuerySpec("2", true), null), ImmutableList.of("2"));
}
Also used : RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) ArrayList(java.util.ArrayList) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) Test(org.junit.Test)

Aggregations

RegexDimFilter (org.apache.druid.query.filter.RegexDimFilter)24 Test (org.junit.Test)18 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)12 SearchQueryDimFilter (org.apache.druid.query.filter.SearchQueryDimFilter)12 ContainsSearchQuerySpec (org.apache.druid.query.search.ContainsSearchQuerySpec)12 InDimFilter (org.apache.druid.query.filter.InDimFilter)11 JavaScriptDimFilter (org.apache.druid.query.filter.JavaScriptDimFilter)11 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)9 ArrayList (java.util.ArrayList)6 LookupExtractionFn (org.apache.druid.query.lookup.LookupExtractionFn)5 HashMap (java.util.HashMap)4 MapLookupExtractor (org.apache.druid.query.extraction.MapLookupExtractor)4 OrDimFilter (org.apache.druid.query.filter.OrDimFilter)4 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)4 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)3 ExtractionFn (org.apache.druid.query.extraction.ExtractionFn)3 JavaScriptExtractionFn (org.apache.druid.query.extraction.JavaScriptExtractionFn)3 DimFilter (org.apache.druid.query.filter.DimFilter)3 LookupExtractor (org.apache.druid.query.lookup.LookupExtractor)3 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)2