Search in sources :

Example 11 with OrDimFilter

use of io.druid.query.filter.OrDimFilter in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByTimeExtractionWithNulls.

@Test
public void testGroupByTimeExtractionWithNulls() {
    final DimExtractionFn nullWednesdays = new DimExtractionFn() {

        @Override
        public String apply(String dimValue) {
            if ("Wednesday".equals(dimValue)) {
                return null;
            } else {
                return dimValue;
            }
        }

        @Override
        public byte[] getCacheKey() {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean preservesOrdering() {
            return false;
        }

        @Override
        public ExtractionType getExtractionType() {
            return ExtractionType.MANY_TO_ONE;
        }
    };
    GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.fullOnInterval).setDimensions(Lists.newArrayList(new DefaultDimensionSpec("market", "market"), new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "dayOfWeek", new CascadeExtractionFn(new ExtractionFn[] { new TimeFormatExtractionFn("EEEE", null, null, null, false), nullWednesdays })))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexDoubleSum)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).setGranularity(QueryRunnerTestHelper.allGran).setDimFilter(new OrDimFilter(Arrays.<DimFilter>asList(new SelectorDimFilter("market", "spot", null), new SelectorDimFilter("market", "upfront", null)))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "spot", "index", 14271.368591308594, "rows", 126L, "addRowsIndexConstant", 14398.368591308594), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "spot", "index", 13219.574157714844, "rows", 117L, "addRowsIndexConstant", 13337.574157714844), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "spot", "index", 13557.738830566406, "rows", 117L, "addRowsIndexConstant", 13675.738830566406), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "spot", "index", 13493.751281738281, "rows", 117L, "addRowsIndexConstant", 13611.751281738281), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "spot", "index", 13585.541015625, "rows", 117L, "addRowsIndexConstant", 13703.541015625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "spot", "index", 14279.127197265625, "rows", 126L, "addRowsIndexConstant", 14406.127197265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "spot", "index", 13199.471435546875, "rows", 117L, "addRowsIndexConstant", 13317.471435546875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", null, "market", "upfront", "index", 28985.5751953125, "rows", 28L, "addRowsIndexConstant", 29014.5751953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Friday", "market", "upfront", "index", 27297.8623046875, "rows", 26L, "addRowsIndexConstant", 27324.8623046875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Monday", "market", "upfront", "index", 27619.58447265625, "rows", 26L, "addRowsIndexConstant", 27646.58447265625), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Saturday", "market", "upfront", "index", 27820.83154296875, "rows", 26L, "addRowsIndexConstant", 27847.83154296875), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Sunday", "market", "upfront", "index", 24791.223876953125, "rows", 26L, "addRowsIndexConstant", 24818.223876953125), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Thursday", "market", "upfront", "index", 28562.748901367188, "rows", 28L, "addRowsIndexConstant", 28591.748901367188), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01", "dayOfWeek", "Tuesday", "market", "upfront", "index", 26968.280639648438, "rows", 26L, "addRowsIndexConstant", 26995.280639648438));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) CascadeExtractionFn(io.druid.query.extraction.CascadeExtractionFn) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) Row(io.druid.data.input.Row) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 12 with OrDimFilter

use of io.druid.query.filter.OrDimFilter in project druid by druid-io.

the class DruidSemiJoin method getLeftRelWithFilter.

/**
   * Returns a copy of the left rel with the filter applied from the right-hand side. This is an expensive operation
   * since it actually executes the right-hand side query.
   */
private DruidRel<?> getLeftRelWithFilter() {
    // Build list of acceptable values from right side.
    final Set<List<String>> valuess = Sets.newHashSet();
    final List<DimFilter> filters = Lists.newArrayList();
    right.runQuery().accumulate(null, new Accumulator<Object, Object[]>() {

        @Override
        public Object accumulate(final Object dummyValue, final Object[] row) {
            final List<String> values = Lists.newArrayListWithCapacity(rightKeys.size());
            for (int i : rightKeys) {
                final Object value = row[i];
                final String stringValue = value != null ? String.valueOf(value) : "";
                values.add(stringValue);
                if (values.size() > maxSemiJoinRowsInMemory) {
                    throw new ResourceLimitExceededException(String.format("maxSemiJoinRowsInMemory[%,d] exceeded", maxSemiJoinRowsInMemory));
                }
            }
            if (valuess.add(values)) {
                final List<DimFilter> bounds = Lists.newArrayList();
                for (int i = 0; i < values.size(); i++) {
                    bounds.add(new BoundDimFilter(leftRowExtractions.get(i).getColumn(), values.get(i), values.get(i), false, false, null, leftRowExtractions.get(i).getExtractionFn(), getSourceRowSignature().naturalStringComparator(leftRowExtractions.get(i))));
                }
                filters.add(new AndDimFilter(bounds));
            }
            return null;
        }
    });
    valuess.clear();
    if (!filters.isEmpty()) {
        // Add a filter to the left side. Use OR of singleton Bound filters so they can be simplified later.
        final DimFilter semiJoinFilter = new OrDimFilter(filters);
        final DimFilter newFilter = left.getQueryBuilder().getFilter() == null ? semiJoinFilter : new AndDimFilter(ImmutableList.of(semiJoinFilter, left.getQueryBuilder().getFilter()));
        return left.withQueryBuilder(left.getQueryBuilder().withFilter(newFilter));
    } else {
        return null;
    }
}
Also used : BoundDimFilter(io.druid.query.filter.BoundDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) OrDimFilter(io.druid.query.filter.OrDimFilter) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) OrDimFilter(io.druid.query.filter.OrDimFilter) DimFilter(io.druid.query.filter.DimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter)

Example 13 with OrDimFilter

use of io.druid.query.filter.OrDimFilter in project druid by druid-io.

the class BottomUpTransform method apply0.

private DimFilter apply0(final DimFilter filter) {
    if (filter instanceof AndDimFilter) {
        final List<DimFilter> oldFilters = ((AndDimFilter) filter).getFields();
        final List<DimFilter> newFilters = Lists.newArrayList();
        for (DimFilter oldFilter : oldFilters) {
            final DimFilter newFilter = apply0(oldFilter);
            if (newFilter != null) {
                newFilters.add(newFilter);
            }
        }
        if (!newFilters.equals(oldFilters)) {
            return checkedProcess(new AndDimFilter(newFilters));
        } else {
            return checkedProcess(filter);
        }
    } else if (filter instanceof OrDimFilter) {
        final List<DimFilter> oldFilters = ((OrDimFilter) filter).getFields();
        final List<DimFilter> newFilters = Lists.newArrayList();
        for (DimFilter oldFilter : oldFilters) {
            final DimFilter newFilter = apply0(oldFilter);
            if (newFilter != null) {
                newFilters.add(newFilter);
            }
        }
        if (!newFilters.equals(oldFilters)) {
            return checkedProcess(new OrDimFilter(newFilters));
        } else {
            return checkedProcess(filter);
        }
    } else if (filter instanceof NotDimFilter) {
        final DimFilter oldFilter = ((NotDimFilter) filter).getField();
        final DimFilter newFilter = apply0(oldFilter);
        if (!oldFilter.equals(newFilter)) {
            return checkedProcess(new NotDimFilter(newFilter));
        } else {
            return checkedProcess(filter);
        }
    } else {
        return checkedProcess(filter);
    }
}
Also used : NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) List(java.util.List) NotDimFilter(io.druid.query.filter.NotDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) DimFilter(io.druid.query.filter.DimFilter)

Example 14 with OrDimFilter

use of io.druid.query.filter.OrDimFilter in project druid by druid-io.

the class ConvertSelectorsToIns method process.

@Override
public DimFilter process(DimFilter filter) {
    if (filter instanceof OrDimFilter) {
        // Copy children list
        final List<DimFilter> children = Lists.newArrayList(((OrDimFilter) filter).getFields());
        // Group filters by dimension and extractionFn.
        final Map<BoundRefKey, List<SelectorDimFilter>> selectors = Maps.newHashMap();
        for (DimFilter child : children) {
            if (child instanceof SelectorDimFilter) {
                final SelectorDimFilter selector = (SelectorDimFilter) child;
                final BoundRefKey boundRefKey = BoundRefKey.from(selector, sourceRowSignature.naturalStringComparator(RowExtraction.of(selector.getDimension(), selector.getExtractionFn())));
                List<SelectorDimFilter> filterList = selectors.get(boundRefKey);
                if (filterList == null) {
                    filterList = Lists.newArrayList();
                    selectors.put(boundRefKey, filterList);
                }
                filterList.add(selector);
            }
        }
        // Emit IN filters for each group of size > 1.
        for (Map.Entry<BoundRefKey, List<SelectorDimFilter>> entry : selectors.entrySet()) {
            final List<SelectorDimFilter> filterList = entry.getValue();
            if (filterList.size() > 1) {
                // We found a simplification. Remove the old filters and add new ones.
                final List<String> values = Lists.newArrayList();
                for (final SelectorDimFilter selector : filterList) {
                    values.add(selector.getValue());
                    if (!children.remove(selector)) {
                        throw new ISE("WTF?! Tried to remove selector but couldn't?");
                    }
                }
                children.add(new InDimFilter(entry.getKey().getDimension(), values, entry.getKey().getExtractionFn()));
            }
        }
        if (!children.equals(((OrDimFilter) filter).getFields())) {
            return children.size() == 1 ? children.get(0) : new OrDimFilter(children);
        } else {
            return filter;
        }
    } else {
        return filter;
    }
}
Also used : SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) InDimFilter(io.druid.query.filter.InDimFilter) List(java.util.List) ISE(io.druid.java.util.common.ISE) InDimFilter(io.druid.query.filter.InDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimFilter(io.druid.query.filter.DimFilter) Map(java.util.Map)

Example 15 with OrDimFilter

use of io.druid.query.filter.OrDimFilter in project druid by druid-io.

the class MoveTimeFiltersToIntervals method extractConvertibleTimeBounds.

/**
   * Extract bound filters on __time that can be converted to query-level "intervals".
   *
   * @return pair of new dimFilter + RangeSet of __time that should be ANDed together. Either can be null but not both.
   */
private static Pair<DimFilter, RangeSet<Long>> extractConvertibleTimeBounds(final DimFilter filter) {
    if (filter instanceof AndDimFilter) {
        final List<DimFilter> children = ((AndDimFilter) filter).getFields();
        final List<DimFilter> newChildren = Lists.newArrayList();
        final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
        for (DimFilter child : children) {
            final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
            if (pair.lhs != null) {
                newChildren.add(pair.lhs);
            }
            if (pair.rhs != null) {
                rangeSets.add(pair.rhs);
            }
        }
        final DimFilter newFilter;
        if (newChildren.size() == 0) {
            newFilter = null;
        } else if (newChildren.size() == 1) {
            newFilter = newChildren.get(0);
        } else {
            newFilter = new AndDimFilter(newChildren);
        }
        return Pair.of(newFilter, rangeSets.isEmpty() ? null : RangeSets.intersectRangeSets(rangeSets));
    } else if (filter instanceof OrDimFilter) {
        final List<DimFilter> children = ((OrDimFilter) filter).getFields();
        final List<RangeSet<Long>> rangeSets = Lists.newArrayList();
        boolean allCompletelyConverted = true;
        boolean allHadIntervals = true;
        for (DimFilter child : children) {
            final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
            if (pair.lhs != null) {
                allCompletelyConverted = false;
            }
            if (pair.rhs != null) {
                rangeSets.add(pair.rhs);
            } else {
                allHadIntervals = false;
            }
        }
        if (allCompletelyConverted) {
            return Pair.of(null, RangeSets.unionRangeSets(rangeSets));
        } else {
            return Pair.of(filter, allHadIntervals ? RangeSets.unionRangeSets(rangeSets) : null);
        }
    } else if (filter instanceof NotDimFilter) {
        final DimFilter child = ((NotDimFilter) filter).getField();
        final Pair<DimFilter, RangeSet<Long>> pair = extractConvertibleTimeBounds(child);
        if (pair.rhs != null && pair.lhs == null) {
            return Pair.of(null, pair.rhs.complement());
        } else {
            return Pair.of(filter, null);
        }
    } else if (filter instanceof BoundDimFilter) {
        final BoundDimFilter bound = (BoundDimFilter) filter;
        if (BoundRefKey.from(bound).equals(TIME_BOUND_REF_KEY)) {
            return Pair.of(null, RangeSets.of(toLongRange(Bounds.toRange(bound))));
        } else {
            return Pair.of(filter, null);
        }
    } else {
        return Pair.of(filter, null);
    }
}
Also used : NotDimFilter(io.druid.query.filter.NotDimFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) RangeSet(com.google.common.collect.RangeSet) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) OrDimFilter(io.druid.query.filter.OrDimFilter) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) BoundDimFilter(io.druid.query.filter.BoundDimFilter) Pair(io.druid.java.util.common.Pair)

Aggregations

OrDimFilter (io.druid.query.filter.OrDimFilter)23 DimFilter (io.druid.query.filter.DimFilter)18 AndDimFilter (io.druid.query.filter.AndDimFilter)17 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)15 BoundDimFilter (io.druid.query.filter.BoundDimFilter)13 Test (org.junit.Test)12 List (java.util.List)8 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)7 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)6 InDimFilter (io.druid.query.filter.InDimFilter)6 NotDimFilter (io.druid.query.filter.NotDimFilter)6 ArrayList (java.util.ArrayList)6 Row (io.druid.data.input.Row)5 JavaScriptDimFilter (io.druid.query.filter.JavaScriptDimFilter)5 RegexDimFilter (io.druid.query.filter.RegexDimFilter)5 SearchQueryDimFilter (io.druid.query.filter.SearchQueryDimFilter)5 TimeFormatExtractionFn (io.druid.query.extraction.TimeFormatExtractionFn)4 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)3 DimensionSpec (io.druid.query.dimension.DimensionSpec)3 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)3