Search in sources :

Example 1 with OrDimFilter

use of org.apache.druid.query.filter.OrDimFilter in project druid by druid-io.

the class FilteredAggregatorBenchmark method setup.

/**
 * Setup everything common for benchmarking both the incremental-index and the queriable-index.
 */
@Setup
public void setup() {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schema);
    generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
    filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Collections.singletonList("X"), null)));
    filteredMetric = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
    factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
    List<AggregatorFactory> queryAggs = Collections.singletonList(filteredMetric);
    query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(descending).build();
}
Also used : FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) RegexDimFilter(org.apache.druid.query.filter.RegexDimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) ContainsSearchQuerySpec(org.apache.druid.query.search.ContainsSearchQuerySpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataGenerator(org.apache.druid.segment.generator.DataGenerator) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) InDimFilter(org.apache.druid.query.filter.InDimFilter) SearchQueryDimFilter(org.apache.druid.query.filter.SearchQueryDimFilter) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Setup(org.openjdk.jmh.annotations.Setup)

Example 2 with OrDimFilter

use of org.apache.druid.query.filter.OrDimFilter in project druid by druid-io.

the class FilterPartitionBenchmark method readComplexOrFilterCNF.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void readComplexOrFilterCNF(Blackhole blackhole) {
    DimFilter dimFilter1 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dimSequential", "199", null), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Corundum", null), new SelectorDimFilter("dimMultivalEnumerated", "Bar", null)))));
    DimFilter dimFilter2 = new OrDimFilter(Arrays.asList(new SelectorDimFilter("dimSequential", "299", null), new SelectorDimFilter("dimSequential", "399", null), new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Xylophone", null), new SelectorDimFilter("dimMultivalEnumerated", "Foo", null)))));
    DimFilter dimFilter3 = new OrDimFilter(Arrays.asList(dimFilter1, dimFilter2, new AndDimFilter(Arrays.asList(new NoBitmapSelectorDimFilter("dimMultivalEnumerated2", "Orange", null), new SelectorDimFilter("dimMultivalEnumerated", "World", null)))));
    StorageAdapter sa = new QueryableIndexStorageAdapter(qIndex);
    Sequence<Cursor> cursors = makeCursors(sa, Filters.toCnf(dimFilter3.toFilter()));
    readCursors(cursors, blackhole);
}
Also used : AndDimFilter(org.apache.druid.query.filter.AndDimFilter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) Cursor(org.apache.druid.segment.Cursor) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 3 with OrDimFilter

use of org.apache.druid.query.filter.OrDimFilter in project druid by druid-io.

the class BottomUpTransform method apply0.

private DimFilter apply0(final DimFilter filter) {
    if (filter instanceof AndDimFilter) {
        final List<DimFilter> oldFilters = ((AndDimFilter) filter).getFields();
        final List<DimFilter> newFilters = new ArrayList<>();
        for (DimFilter oldFilter : oldFilters) {
            final DimFilter newFilter = apply0(oldFilter);
            if (newFilter != null) {
                newFilters.add(newFilter);
            }
        }
        if (!newFilters.equals(oldFilters)) {
            return checkedProcess(new AndDimFilter(newFilters));
        } else {
            return checkedProcess(filter);
        }
    } else if (filter instanceof OrDimFilter) {
        final List<DimFilter> oldFilters = ((OrDimFilter) filter).getFields();
        final List<DimFilter> newFilters = new ArrayList<>();
        for (DimFilter oldFilter : oldFilters) {
            final DimFilter newFilter = apply0(oldFilter);
            if (newFilter != null) {
                newFilters.add(newFilter);
            }
        }
        if (!newFilters.equals(oldFilters)) {
            return checkedProcess(new OrDimFilter(newFilters));
        } else {
            return checkedProcess(filter);
        }
    } else if (filter instanceof NotDimFilter) {
        final DimFilter oldFilter = ((NotDimFilter) filter).getField();
        final DimFilter newFilter = apply0(oldFilter);
        if (!oldFilter.equals(newFilter)) {
            return checkedProcess(new NotDimFilter(newFilter));
        } else {
            return checkedProcess(filter);
        }
    } else {
        return checkedProcess(filter);
    }
}
Also used : NotDimFilter(org.apache.druid.query.filter.NotDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) ArrayList(java.util.ArrayList) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) List(java.util.List) ArrayList(java.util.ArrayList) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) NotDimFilter(org.apache.druid.query.filter.NotDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter)

Example 4 with OrDimFilter

use of org.apache.druid.query.filter.OrDimFilter in project druid by druid-io.

the class NativeQueryMaker method runQuery.

@Override
public Sequence<Object[]> runQuery(final DruidQuery druidQuery) {
    final Query<?> query = druidQuery.getQuery();
    if (plannerContext.getPlannerConfig().isRequireTimeCondition() && !(druidQuery.getDataSource() instanceof InlineDataSource)) {
        if (Intervals.ONLY_ETERNITY.equals(findBaseDataSourceIntervals(query))) {
            throw new CannotBuildQueryException("requireTimeCondition is enabled, all queries must include a filter condition on the __time column");
        }
    }
    int numFilters = plannerContext.getPlannerConfig().getMaxNumericInFilters();
    // Instead of IN(v1,v2,v3) user should specify IN('v1','v2','v3')
    if (numFilters != PlannerConfig.NUM_FILTER_NOT_USED) {
        if (query.getFilter() instanceof OrDimFilter) {
            OrDimFilter orDimFilter = (OrDimFilter) query.getFilter();
            int numBoundFilters = 0;
            for (DimFilter filter : orDimFilter.getFields()) {
                numBoundFilters += filter instanceof BoundDimFilter ? 1 : 0;
            }
            if (numBoundFilters > numFilters) {
                String dimension = ((BoundDimFilter) (orDimFilter.getFields().get(0))).getDimension();
                throw new UOE(StringUtils.format("The number of values in the IN clause for [%s] in query exceeds configured maxNumericFilter limit of [%s] for INs. Cast [%s] values of IN clause to String", dimension, numFilters, orDimFilter.getFields().size()));
            }
        }
    }
    final List<String> rowOrder;
    if (query instanceof TimeseriesQuery && !druidQuery.getGrouping().getDimensions().isEmpty()) {
        // Hack for timeseries queries: when generating them, DruidQuery.toTimeseriesQuery translates a dimension
        // based on a timestamp_floor expression into a 'granularity'. This is not reflected in the druidQuery's
        // output row signature, so we have to account for it here.
        // TODO: We can remove this once https://github.com/apache/druid/issues/9974 is done.
        final String timeDimension = Iterables.getOnlyElement(druidQuery.getGrouping().getDimensions()).getOutputName();
        rowOrder = druidQuery.getOutputRowSignature().getColumnNames().stream().map(f -> timeDimension.equals(f) ? ColumnHolder.TIME_COLUMN_NAME : f).collect(Collectors.toList());
    } else {
        rowOrder = druidQuery.getOutputRowSignature().getColumnNames();
    }
    final List<SqlTypeName> columnTypes = druidQuery.getOutputRowType().getFieldList().stream().map(f -> f.getType().getSqlTypeName()).collect(Collectors.toList());
    return execute(query, mapColumnList(rowOrder, fieldMapping), mapColumnList(columnTypes, fieldMapping));
}
Also used : Arrays(java.util.Arrays) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) Object2IntOpenHashMap(it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap) IAE(org.apache.druid.java.util.common.IAE) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) ISE(org.apache.druid.java.util.common.ISE) UUID(java.util.UUID) PlannerConfig(org.apache.druid.sql.calcite.planner.PlannerConfig) Collectors(java.util.stream.Collectors) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) DimFilter(org.apache.druid.query.filter.DimFilter) Hook(org.apache.calcite.runtime.Hook) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) QueryLifecycleFactory(org.apache.druid.server.QueryLifecycleFactory) Iterables(com.google.common.collect.Iterables) ComparableList(org.apache.druid.segment.data.ComparableList) Intervals(org.apache.druid.java.util.common.Intervals) CannotBuildQueryException(org.apache.druid.sql.calcite.rel.CannotBuildQueryException) QueryLifecycle(org.apache.druid.server.QueryLifecycle) ArrayList(java.util.ArrayList) AuthenticationResult(org.apache.druid.server.security.AuthenticationResult) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Query(org.apache.druid.query.Query) Pair(org.apache.calcite.util.Pair) PlannerContext(org.apache.druid.sql.calcite.planner.PlannerContext) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) UOE(org.apache.druid.java.util.common.UOE) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) Sequences(org.apache.druid.java.util.common.guava.Sequences) DruidQuery(org.apache.druid.sql.calcite.rel.DruidQuery) RelDataType(org.apache.calcite.rel.type.RelDataType) Access(org.apache.druid.server.security.Access) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) NlsString(org.apache.calcite.util.NlsString) InlineDataSource(org.apache.druid.query.InlineDataSource) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) QueryToolChest(org.apache.druid.query.QueryToolChest) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) Object2IntMap(it.unimi.dsi.fastutil.objects.Object2IntMap) NullHandling(org.apache.druid.common.config.NullHandling) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) Calcites(org.apache.druid.sql.calcite.planner.Calcites) Evals(org.apache.druid.math.expr.Evals) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) UOE(org.apache.druid.java.util.common.UOE) NlsString(org.apache.calcite.util.NlsString) CannotBuildQueryException(org.apache.druid.sql.calcite.rel.CannotBuildQueryException) InlineDataSource(org.apache.druid.query.InlineDataSource) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter)

Example 5 with OrDimFilter

use of org.apache.druid.query.filter.OrDimFilter in project druid by druid-io.

the class CombineAndSimplifyBounds method doSimplify.

/**
 * Simplify BoundDimFilters that are children of an OR or an AND.
 *
 * @param children    the filters
 * @param disjunction true for OR, false for AND
 *
 * @return simplified filters
 */
private static DimFilter doSimplify(final List<DimFilter> children, boolean disjunction) {
    // Copy the list of child filters. We'll modify the copy and eventually return it.
    final List<DimFilter> newChildren = Lists.newArrayList(children);
    // Group Bound filters by dimension, extractionFn, and comparator and compute a RangeSet for each one.
    final Map<BoundRefKey, List<BoundDimFilter>> bounds = new HashMap<>();
    // all and/or filters have at least 1 child
    boolean allFalse = true;
    for (final DimFilter child : newChildren) {
        if (child instanceof BoundDimFilter) {
            final BoundDimFilter bound = (BoundDimFilter) child;
            final BoundRefKey boundRefKey = BoundRefKey.from(bound);
            final List<BoundDimFilter> filterList = bounds.computeIfAbsent(boundRefKey, k -> new ArrayList<>());
            filterList.add(bound);
            allFalse = false;
        } else {
            allFalse &= child instanceof FalseDimFilter;
        }
    }
    // short circuit if can never be true
    if (allFalse) {
        return Filtration.matchNothing();
    }
    // Try to simplify filters within each group.
    for (Map.Entry<BoundRefKey, List<BoundDimFilter>> entry : bounds.entrySet()) {
        final BoundRefKey boundRefKey = entry.getKey();
        final List<BoundDimFilter> filterList = entry.getValue();
        // Create a RangeSet for this group.
        final RangeSet<BoundValue> rangeSet = disjunction ? RangeSets.unionRanges(Bounds.toRanges(filterList)) : RangeSets.intersectRanges(Bounds.toRanges(filterList));
        if (rangeSet.asRanges().size() < filterList.size()) {
            // We found a simplification. Remove the old filters and add new ones.
            for (final BoundDimFilter bound : filterList) {
                if (!newChildren.remove(bound)) {
                    // Don't expect this to happen, but include it as a sanity check.
                    throw new ISE("Tried to remove bound, but couldn't");
                }
            }
            if (rangeSet.asRanges().isEmpty()) {
                // range set matches nothing, equivalent to FALSE
                newChildren.add(Filtration.matchNothing());
            }
            for (final Range<BoundValue> range : rangeSet.asRanges()) {
                if (!range.hasLowerBound() && !range.hasUpperBound()) {
                    // range matches all, equivalent to TRUE
                    newChildren.add(Filtration.matchEverything());
                } else {
                    newChildren.add(Bounds.toFilter(boundRefKey, range));
                }
            }
        }
    }
    // Finally: Go through newChildren, removing or potentially exiting early based on TRUE / FALSE marker filters.
    Preconditions.checkState(newChildren.size() > 0, "newChildren.size > 0");
    final Iterator<DimFilter> iterator = newChildren.iterator();
    while (iterator.hasNext()) {
        final DimFilter newChild = iterator.next();
        if (Filtration.matchNothing().equals(newChild)) {
            // AND with FALSE => always false, short circuit
            if (disjunction) {
                iterator.remove();
            } else {
                return Filtration.matchNothing();
            }
        } else if (Filtration.matchEverything().equals(newChild)) {
            // AND with TRUE => ignore
            if (disjunction) {
                return Filtration.matchEverything();
            } else {
                iterator.remove();
            }
        }
    }
    if (newChildren.isEmpty()) {
        // If "newChildren" is empty at this point, it must have consisted entirely of TRUE / FALSE marker filters.
        if (disjunction) {
            // Must have been all FALSE filters (the only kind we would have removed above).
            return Filtration.matchNothing();
        } else {
            // Must have been all TRUE filters (the only kind we would have removed above).
            return Filtration.matchEverything();
        }
    } else if (newChildren.size() == 1) {
        return newChildren.get(0);
    } else {
        return disjunction ? new OrDimFilter(newChildren) : new AndDimFilter(newChildren);
    }
}
Also used : BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) HashMap(java.util.HashMap) FalseDimFilter(org.apache.druid.query.filter.FalseDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) AndDimFilter(org.apache.druid.query.filter.AndDimFilter) NotDimFilter(org.apache.druid.query.filter.NotDimFilter) FalseDimFilter(org.apache.druid.query.filter.FalseDimFilter) DimFilter(org.apache.druid.query.filter.DimFilter) BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) OrDimFilter(org.apache.druid.query.filter.OrDimFilter) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

OrDimFilter (org.apache.druid.query.filter.OrDimFilter)33 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)25 Test (org.junit.Test)23 AndDimFilter (org.apache.druid.query.filter.AndDimFilter)17 DimFilter (org.apache.druid.query.filter.DimFilter)15 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)12 ArrayList (java.util.ArrayList)9 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 List (java.util.List)6 InDimFilter (org.apache.druid.query.filter.InDimFilter)6 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)5 NotDimFilter (org.apache.druid.query.filter.NotDimFilter)5 TimeFormatExtractionFn (org.apache.druid.query.extraction.TimeFormatExtractionFn)4 Filter (org.apache.druid.query.filter.Filter)4 HashMap (java.util.HashMap)3 ISE (org.apache.druid.java.util.common.ISE)3 Result (org.apache.druid.query.Result)3 ExtractionDimensionSpec (org.apache.druid.query.dimension.ExtractionDimensionSpec)3 RegexDimFilter (org.apache.druid.query.filter.RegexDimFilter)3