use of io.druid.query.filter.RegexDimFilter in project druid by druid-io.
the class FilteredAggregatorTest method testAggregateWithPredicateFilters.
@Test
public void testAggregateWithPredicateFilters() {
final float[] values = { 0.15f, 0.27f };
TestFloatColumnSelector selector;
FilteredAggregatorFactory factory;
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new BoundDimFilter("dim", "a", "a", false, false, true, null, StringComparators.ALPHANUMERIC));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new RegexDimFilter("dim", "a", null));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new SearchQueryDimFilter("dim", new ContainsSearchQuerySpec("a", true), null));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
String jsFn = "function(x) { return(x === 'a') }";
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new JavaScriptDimFilter("dim", jsFn, null, JavaScriptConfig.getEnabledInstance()));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
}
use of io.druid.query.filter.RegexDimFilter in project druid by druid-io.
the class FilteredAggregatorBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
incIndex = makeIncIndex(schemaInfo.getAggsArray());
filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Arrays.asList("X"), null)));
filteredMetrics = new AggregatorFactory[1];
filteredMetrics[0] = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
incIndexFilteredAgg = makeIncIndex(filteredMetrics);
inputRows = new ArrayList<>();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
inputRows.add(row);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
qIndex = INDEX_IO.loadIndex(indexFile);
factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(filteredMetrics[0]);
query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
}
use of io.druid.query.filter.RegexDimFilter in project druid by druid-io.
the class GroupByQueryRunnerTest method testBySegmentResultsWithAllFiltersWithExtractionFns.
@Test
public void testBySegmentResultsWithAllFiltersWithExtractionFns() {
int segmentCount = 32;
Result<BySegmentResultValue> singleSegmentResult = new Result<BySegmentResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new BySegmentResultValueClass(Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L)), "testSegment", new Interval("2011-04-02T00:00:00.000Z/2011-04-04T00:00:00.000Z")));
List<Result> bySegmentResults = Lists.newArrayList();
for (int i = 0; i < segmentCount; i++) {
bySegmentResults.add(singleSegmentResult);
}
String extractionJsFn = "function(str) { return 'super-' + str; }";
String jsFn = "function(x) { return(x === 'super-mezzanine') }";
ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance());
List<DimFilter> superFilterList = new ArrayList<>();
superFilterList.add(new SelectorDimFilter("quality", "super-mezzanine", extractionFn));
superFilterList.add(new InDimFilter("quality", Arrays.asList("not-super-mezzanine", "FOOBAR", "super-mezzanine"), extractionFn));
superFilterList.add(new BoundDimFilter("quality", "super-mezzanine", "super-mezzanine", false, false, true, extractionFn, StringComparators.ALPHANUMERIC));
superFilterList.add(new RegexDimFilter("quality", "super-mezzanine", extractionFn));
superFilterList.add(new SearchQueryDimFilter("quality", new ContainsSearchQuerySpec("super-mezzanine", true), extractionFn));
superFilterList.add(new JavaScriptDimFilter("quality", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()));
DimFilter superFilter = new AndDimFilter(superFilterList);
GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setInterval("2011-04-02/2011-04-04").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setDimFilter(superFilter).setContext(ImmutableMap.<String, Object>of("bySegment", true));
final GroupByQuery fullQuery = builder.build();
QueryToolChest toolChest = factory.getToolchest();
List<QueryRunner<Row>> singleSegmentRunners = Lists.newArrayList();
for (int i = 0; i < segmentCount; i++) {
singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
}
ExecutorService exec = Executors.newCachedThreadPool();
QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(Executors.newCachedThreadPool(), singleSegmentRunners)), toolChest));
TestHelper.assertExpectedObjects(bySegmentResults, theRunner.run(fullQuery, Maps.newHashMap()), "");
exec.shutdownNow();
}
use of io.druid.query.filter.RegexDimFilter in project druid by druid-io.
the class IncrementalIndexReadBenchmark method readWithFilters.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readWithFilters(Blackhole blackhole) throws Exception {
DimFilter filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Arrays.asList("X"), null)));
IncrementalIndexStorageAdapter sa = new IncrementalIndexStorageAdapter(incIndex);
Sequence<Cursor> cursors = makeCursors(sa, filter);
Cursor cursor = Sequences.toList(Sequences.limit(cursors, 1), Lists.<Cursor>newArrayList()).get(0);
List<DimensionSelector> selectors = new ArrayList<>();
selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimSequential", null)));
selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimZipf", null)));
selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimUniform", null)));
selectors.add(cursor.makeDimensionSelector(new DefaultDimensionSpec("dimSequentialHalfNull", null)));
cursor.reset();
while (!cursor.isDone()) {
for (DimensionSelector selector : selectors) {
IndexedInts row = selector.getRow();
blackhole.consume(selector.lookupName(row.get(0)));
}
cursor.advance();
}
}
use of io.druid.query.filter.RegexDimFilter in project druid by druid-io.
the class TimeFilteringTest method testTimeFilterWithExtractionFn.
@Test
public void testTimeFilterWithExtractionFn() {
final Map<String, String> stringMap = new HashMap<>();
stringMap.put("0", "Monday");
stringMap.put("1", "Tuesday");
stringMap.put("2", "Wednesday");
stringMap.put("3", "Thursday");
stringMap.put("4", "Friday");
stringMap.put("5", "Saturday");
LookupExtractor mapExtractor = new MapLookupExtractor(stringMap, false);
LookupExtractionFn exfn = new LookupExtractionFn(mapExtractor, false, "UNKNOWN", false, true);
assertFilterMatches(new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Monday", exfn), ImmutableList.<String>of("0"));
assertFilterMatches(new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Notaday", exfn), ImmutableList.<String>of());
assertFilterMatches(new BoundDimFilter(Column.TIME_COLUMN_NAME, "Fridax", "Fridaz", false, false, null, exfn, StringComparators.ALPHANUMERIC), ImmutableList.<String>of("4"));
assertFilterMatches(new BoundDimFilter(Column.TIME_COLUMN_NAME, "Friday", "Friday", true, true, null, exfn, StringComparators.ALPHANUMERIC), ImmutableList.<String>of());
assertFilterMatches(new InDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList("Caturday", "Saturday", "Tuesday"), exfn), ImmutableList.<String>of("1", "5"));
// test InFilter HashSet implementation
List<String> bigList = Arrays.asList("Saturday", "Tuesday", "Caturday", "Xanaday", "Vojuday", "Gribaday", "Kipoday", "Dheferday", "Fakeday", "Qeearaday", "Hello", "World", "1", "2", "3", "4", "5", "6", "7");
assertFilterMatches(new InDimFilter(Column.TIME_COLUMN_NAME, bigList, exfn), ImmutableList.<String>of("1", "5"));
String jsFn = "function(x) { return(x === 'Wednesday' || x === 'Thursday') }";
assertFilterMatches(new JavaScriptDimFilter(Column.TIME_COLUMN_NAME, jsFn, exfn, JavaScriptConfig.getEnabledInstance()), ImmutableList.<String>of("2", "3"));
assertFilterMatches(new RegexDimFilter(Column.TIME_COLUMN_NAME, ".*day", exfn), ImmutableList.<String>of("0", "1", "2", "3", "4", "5"));
assertFilterMatches(new SearchQueryDimFilter(Column.TIME_COLUMN_NAME, new ContainsSearchQuerySpec("s", true), exfn), ImmutableList.<String>of("1", "2", "3"));
}
Aggregations