Search in sources :

Example 21 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class Expressions method toLeafFilter.

/**
   * Translates "condition" to a Druid filter, assuming it does not contain any boolean expressions. Returns null
   * if we cannot translate the condition.
   *
   * @param plannerContext planner context
   * @param rowSignature   row signature of the dataSource to be filtered
   * @param expression     Calcite row expression
   */
private static DimFilter toLeafFilter(final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final RowSignature rowSignature, final RexNode expression) {
    if (expression.isAlwaysTrue()) {
        return Filtration.matchEverything();
    } else if (expression.isAlwaysFalse()) {
        return Filtration.matchNothing();
    }
    final SqlKind kind = expression.getKind();
    if (kind == SqlKind.LIKE) {
        final List<RexNode> operands = ((RexCall) expression).getOperands();
        final RowExtraction rex = toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), operands.get(0));
        if (rex == null || !rex.isFilterable(rowSignature)) {
            return null;
        }
        return new LikeDimFilter(rex.getColumn(), RexLiteral.stringValue(operands.get(1)), operands.size() > 2 ? RexLiteral.stringValue(operands.get(2)) : null, rex.getExtractionFn());
    } else if (kind == SqlKind.EQUALS || kind == SqlKind.NOT_EQUALS || kind == SqlKind.GREATER_THAN || kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL) {
        final List<RexNode> operands = ((RexCall) expression).getOperands();
        Preconditions.checkState(operands.size() == 2, "WTF?! Expected 2 operands, got[%,d]", operands.size());
        boolean flip = false;
        RexNode lhs = operands.get(0);
        RexNode rhs = operands.get(1);
        if (lhs.getKind() == SqlKind.LITERAL && rhs.getKind() != SqlKind.LITERAL) {
            // swap lhs, rhs
            RexNode x = lhs;
            lhs = rhs;
            rhs = x;
            flip = true;
        }
        // rhs must be a literal
        if (rhs.getKind() != SqlKind.LITERAL) {
            return null;
        }
        // lhs must be translatable to a RowExtraction to be filterable
        final RowExtraction rex = toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), lhs);
        if (rex == null || !rex.isFilterable(rowSignature)) {
            return null;
        }
        final String column = rex.getColumn();
        final ExtractionFn extractionFn = rex.getExtractionFn();
        if (column.equals(Column.TIME_COLUMN_NAME) && extractionFn instanceof TimeFormatExtractionFn) {
            // Check if we can strip the extractionFn and convert the filter to a direct filter on __time.
            // This allows potential conversion to query-level "intervals" later on, which is ideal for Druid queries.
            final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn);
            if (granularity != null) {
                // lhs is FLOOR(__time TO granularity); rhs must be a timestamp
                final long rhsMillis = toMillisLiteral(rhs, plannerContext.getTimeZone());
                final Interval rhsInterval = granularity.bucket(new DateTime(rhsMillis));
                // Is rhs aligned on granularity boundaries?
                final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
                // Create a BoundRefKey that strips the extractionFn and compares __time as a number.
                final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
                if (kind == SqlKind.EQUALS) {
                    return rhsAligned ? Bounds.interval(boundRefKey, rhsInterval) : Filtration.matchNothing();
                } else if (kind == SqlKind.NOT_EQUALS) {
                    return rhsAligned ? new NotDimFilter(Bounds.interval(boundRefKey, rhsInterval)) : Filtration.matchEverything();
                } else if ((!flip && kind == SqlKind.GREATER_THAN) || (flip && kind == SqlKind.LESS_THAN)) {
                    return Bounds.greaterThanOrEqualTo(boundRefKey, String.valueOf(rhsInterval.getEndMillis()));
                } else if ((!flip && kind == SqlKind.GREATER_THAN_OR_EQUAL) || (flip && kind == SqlKind.LESS_THAN_OR_EQUAL)) {
                    return rhsAligned ? Bounds.greaterThanOrEqualTo(boundRefKey, String.valueOf(rhsInterval.getStartMillis())) : Bounds.greaterThanOrEqualTo(boundRefKey, String.valueOf(rhsInterval.getEndMillis()));
                } else if ((!flip && kind == SqlKind.LESS_THAN) || (flip && kind == SqlKind.GREATER_THAN)) {
                    return rhsAligned ? Bounds.lessThan(boundRefKey, String.valueOf(rhsInterval.getStartMillis())) : Bounds.lessThan(boundRefKey, String.valueOf(rhsInterval.getEndMillis()));
                } else if ((!flip && kind == SqlKind.LESS_THAN_OR_EQUAL) || (flip && kind == SqlKind.GREATER_THAN_OR_EQUAL)) {
                    return Bounds.lessThan(boundRefKey, String.valueOf(rhsInterval.getEndMillis()));
                } else {
                    throw new IllegalStateException("WTF?! Shouldn't have got here...");
                }
            }
        }
        final String val;
        final RexLiteral rhsLiteral = (RexLiteral) rhs;
        if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) {
            val = String.valueOf(RexLiteral.value(rhsLiteral));
        } else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) {
            val = String.valueOf(RexLiteral.stringValue(rhsLiteral));
        } else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) {
            val = String.valueOf(toMillisLiteral(rhsLiteral, plannerContext.getTimeZone()));
        } else {
            // Don't know how to filter on this kind of literal.
            return null;
        }
        // Numeric lhs needs a numeric comparison.
        final boolean lhsIsNumeric = SqlTypeName.NUMERIC_TYPES.contains(lhs.getType().getSqlTypeName()) || SqlTypeName.TIMESTAMP == lhs.getType().getSqlTypeName() || SqlTypeName.DATE == lhs.getType().getSqlTypeName();
        final StringComparator comparator = lhsIsNumeric ? StringComparators.NUMERIC : StringComparators.LEXICOGRAPHIC;
        final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
        final DimFilter filter;
        // Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator).
        if (kind == SqlKind.EQUALS) {
            filter = Bounds.equalTo(boundRefKey, val);
        } else if (kind == SqlKind.NOT_EQUALS) {
            filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val));
        } else if ((!flip && kind == SqlKind.GREATER_THAN) || (flip && kind == SqlKind.LESS_THAN)) {
            filter = Bounds.greaterThan(boundRefKey, val);
        } else if ((!flip && kind == SqlKind.GREATER_THAN_OR_EQUAL) || (flip && kind == SqlKind.LESS_THAN_OR_EQUAL)) {
            filter = Bounds.greaterThanOrEqualTo(boundRefKey, val);
        } else if ((!flip && kind == SqlKind.LESS_THAN) || (flip && kind == SqlKind.GREATER_THAN)) {
            filter = Bounds.lessThan(boundRefKey, val);
        } else if ((!flip && kind == SqlKind.LESS_THAN_OR_EQUAL) || (flip && kind == SqlKind.GREATER_THAN_OR_EQUAL)) {
            filter = Bounds.lessThanOrEqualTo(boundRefKey, val);
        } else {
            throw new IllegalStateException("WTF?! Shouldn't have got here...");
        }
        return filter;
    } else {
        return null;
    }
}
Also used : TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) RexLiteral(org.apache.calcite.rex.RexLiteral) NotDimFilter(io.druid.query.filter.NotDimFilter) SqlKind(org.apache.calcite.sql.SqlKind) Granularity(io.druid.java.util.common.granularity.Granularity) StringComparator(io.druid.query.ordering.StringComparator) DateTime(org.joda.time.DateTime) RexCall(org.apache.calcite.rex.RexCall) ExtractionFn(io.druid.query.extraction.ExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) LikeDimFilter(io.druid.query.filter.LikeDimFilter) List(java.util.List) BoundRefKey(io.druid.sql.calcite.filtration.BoundRefKey) LikeDimFilter(io.druid.query.filter.LikeDimFilter) OrDimFilter(io.druid.query.filter.OrDimFilter) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) RexNode(org.apache.calcite.rex.RexNode) Interval(org.joda.time.Interval)

Example 22 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class ExtractExtractionOperator method convert.

@Override
public RowExtraction convert(final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<String> rowOrder, final RexNode expression) {
    // EXTRACT(timeUnit FROM expr)
    final RexCall call = (RexCall) expression;
    final RexLiteral flag = (RexLiteral) call.getOperands().get(0);
    final TimeUnitRange timeUnit = (TimeUnitRange) flag.getValue();
    final RexNode expr = call.getOperands().get(1);
    final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowOrder, expr);
    if (rex == null) {
        return null;
    }
    final String dateTimeFormat = TimeUnits.toDateTimeFormat(timeUnit);
    if (dateTimeFormat == null) {
        return null;
    }
    final ExtractionFn baseExtractionFn;
    if (call.getOperator().getName().equals("EXTRACT_DATE")) {
        // Expr will be in number of days since the epoch. Can't translate.
        return null;
    } else {
        // Expr will be in millis since the epoch
        baseExtractionFn = rex.getExtractionFn();
    }
    if (baseExtractionFn instanceof TimeFormatExtractionFn) {
        final TimeFormatExtractionFn baseTimeFormatFn = (TimeFormatExtractionFn) baseExtractionFn;
        final Granularity queryGranularity = ExtractionFns.toQueryGranularity(baseTimeFormatFn);
        if (queryGranularity != null) {
            // Combine EXTRACT(X FROM FLOOR(Y TO Z)) into a single extractionFn.
            return RowExtraction.of(rex.getColumn(), new TimeFormatExtractionFn(dateTimeFormat, plannerContext.getTimeZone(), null, queryGranularity, true));
        }
    }
    return RowExtraction.of(rex.getColumn(), ExtractionFns.compose(new TimeFormatExtractionFn(dateTimeFormat, plannerContext.getTimeZone(), null, null, true), baseExtractionFn));
}
Also used : RexCall(org.apache.calcite.rex.RexCall) RexLiteral(org.apache.calcite.rex.RexLiteral) ExtractionFn(io.druid.query.extraction.ExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) TimeUnitRange(org.apache.calcite.avatica.util.TimeUnitRange) Granularity(io.druid.java.util.common.granularity.Granularity) RexNode(org.apache.calcite.rex.RexNode)

Example 23 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class RowBasedColumnSelectorFactory method makeDimensionSelectorUndecorated.

private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
    final String dimension = dimensionSpec.getDimension();
    final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
    if (Column.TIME_COLUMN_NAME.equals(dimensionSpec.getDimension())) {
        if (extractionFn == null) {
            throw new UnsupportedOperationException("time dimension must provide an extraction function");
        }
        return new DimensionSelector() {

            @Override
            public IndexedInts getRow() {
                return ZeroIndexedInts.instance();
            }

            @Override
            public ValueMatcher makeValueMatcher(final String value) {
                return new ValueMatcher() {

                    @Override
                    public boolean matches() {
                        String rowValue = extractionFn.apply(row.get().getTimestampFromEpoch());
                        return Objects.equals(rowValue, value);
                    }
                };
            }

            @Override
            public ValueMatcher makeValueMatcher(final Predicate<String> predicate) {
                return new ValueMatcher() {

                    @Override
                    public boolean matches() {
                        String rowValue = extractionFn.apply(row.get().getTimestampFromEpoch());
                        return predicate.apply(rowValue);
                    }
                };
            }

            @Override
            public int getValueCardinality() {
                return DimensionSelector.CARDINALITY_UNKNOWN;
            }

            @Override
            public String lookupName(int id) {
                return extractionFn.apply(row.get().getTimestampFromEpoch());
            }

            @Override
            public boolean nameLookupPossibleInAdvance() {
                return false;
            }

            @Nullable
            @Override
            public IdLookup idLookup() {
                return null;
            }

            @Override
            public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                inspector.visit("row", row);
                inspector.visit("extractionFn", extractionFn);
            }
        };
    } else {
        return new DimensionSelector() {

            @Override
            public IndexedInts getRow() {
                final List<String> dimensionValues = row.get().getDimension(dimension);
                return RangeIndexedInts.create(dimensionValues != null ? dimensionValues.size() : 0);
            }

            @Override
            public ValueMatcher makeValueMatcher(final String value) {
                if (extractionFn == null) {
                    return new ValueMatcher() {

                        @Override
                        public boolean matches() {
                            final List<String> dimensionValues = row.get().getDimension(dimension);
                            if (dimensionValues == null || dimensionValues.isEmpty()) {
                                return value == null;
                            }
                            for (String dimensionValue : dimensionValues) {
                                if (Objects.equals(Strings.emptyToNull(dimensionValue), value)) {
                                    return true;
                                }
                            }
                            return false;
                        }
                    };
                } else {
                    return new ValueMatcher() {

                        @Override
                        public boolean matches() {
                            final List<String> dimensionValues = row.get().getDimension(dimension);
                            if (dimensionValues == null || dimensionValues.isEmpty()) {
                                return value == null;
                            }
                            for (String dimensionValue : dimensionValues) {
                                if (Objects.equals(extractionFn.apply(Strings.emptyToNull(dimensionValue)), value)) {
                                    return true;
                                }
                            }
                            return false;
                        }
                    };
                }
            }

            @Override
            public ValueMatcher makeValueMatcher(final Predicate<String> predicate) {
                final boolean matchNull = predicate.apply(null);
                if (extractionFn == null) {
                    return new ValueMatcher() {

                        @Override
                        public boolean matches() {
                            final List<String> dimensionValues = row.get().getDimension(dimension);
                            if (dimensionValues == null || dimensionValues.isEmpty()) {
                                return matchNull;
                            }
                            for (String dimensionValue : dimensionValues) {
                                if (predicate.apply(Strings.emptyToNull(dimensionValue))) {
                                    return true;
                                }
                            }
                            return false;
                        }
                    };
                } else {
                    return new ValueMatcher() {

                        @Override
                        public boolean matches() {
                            final List<String> dimensionValues = row.get().getDimension(dimension);
                            if (dimensionValues == null || dimensionValues.isEmpty()) {
                                return matchNull;
                            }
                            for (String dimensionValue : dimensionValues) {
                                if (predicate.apply(extractionFn.apply(Strings.emptyToNull(dimensionValue)))) {
                                    return true;
                                }
                            }
                            return false;
                        }
                    };
                }
            }

            @Override
            public int getValueCardinality() {
                return DimensionSelector.CARDINALITY_UNKNOWN;
            }

            @Override
            public String lookupName(int id) {
                final String value = Strings.emptyToNull(row.get().getDimension(dimension).get(id));
                return extractionFn == null ? value : extractionFn.apply(value);
            }

            @Override
            public boolean nameLookupPossibleInAdvance() {
                return false;
            }

            @Nullable
            @Override
            public IdLookup idLookup() {
                return null;
            }

            @Override
            public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                inspector.visit("row", row);
                inspector.visit("extractionFn", extractionFn);
            }
        };
    }
}
Also used : ExtractionFn(io.druid.query.extraction.ExtractionFn) DimensionSelector(io.druid.segment.DimensionSelector) ValueMatcher(io.druid.query.filter.ValueMatcher) RuntimeShapeInspector(io.druid.query.monomorphicprocessing.RuntimeShapeInspector) Predicate(com.google.common.base.Predicate)

Example 24 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class BoundFilterTest method testMatchWithExtractionFn.

@Test
public void testMatchWithExtractionFn() {
    String extractionJsFn = "function(str) { return 'super-' + str; }";
    ExtractionFn superFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance());
    String nullJsFn = "function(str) { return null; }";
    ExtractionFn makeNullFn = new JavaScriptExtractionFn(nullJsFn, false, JavaScriptConfig.getEnabledInstance());
    assertFilterMatches(new BoundDimFilter("dim0", "", "", false, false, false, makeNullFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7"));
    assertFilterMatches(new BoundDimFilter("dim1", "super-ab", "super-abd", true, true, false, superFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("5"));
    assertFilterMatches(new BoundDimFilter("dim1", "super-0", "super-10", false, false, true, superFn, StringComparators.ALPHANUMERIC), ImmutableList.of("1", "2", "3"));
    assertFilterMatches(new BoundDimFilter("dim2", "super-", "super-zzzzzz", false, false, false, superFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7"));
    assertFilterMatches(new BoundDimFilter("dim2", "super-null", "super-null", false, false, false, superFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("1", "2", "5"));
    assertFilterMatches(new BoundDimFilter("dim3", "super-null", "super-null", false, false, false, superFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7"));
    assertFilterMatches(new BoundDimFilter("dim4", "super-null", "super-null", false, false, false, superFn, StringComparators.LEXICOGRAPHIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7"));
    assertFilterMatches(new BoundDimFilter("dim2", "super-null", "super-null", false, false, false, superFn, StringComparators.NUMERIC), ImmutableList.of("1", "2", "5"));
    assertFilterMatches(new BoundDimFilter("dim4", "super-null", "super-null", false, false, false, superFn, StringComparators.NUMERIC), ImmutableList.of("0", "1", "2", "3", "4", "5", "6", "7"));
}
Also used : JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) BoundDimFilter(io.druid.query.filter.BoundDimFilter) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) Test(org.junit.Test)

Example 25 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TimeFilteringTest method testIntervalFilter.

@Test
public void testIntervalFilter() {
    assertFilterMatches(new IntervalDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList(Interval.parse("1970-01-01T00:00:00.001Z/1970-01-01T00:00:00.005Z")), null), ImmutableList.<String>of("1", "2", "3", "4"));
    assertFilterMatches(new IntervalDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList(Interval.parse("1970-01-01T00:00:00.000Z/1970-01-01T00:00:00.003Z"), Interval.parse("1970-01-01T00:00:00.004Z/1970-01-01T00:00:00.006Z")), null), ImmutableList.<String>of("0", "1", "2", "4", "5"));
    assertFilterMatches(new IntervalDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList(Interval.parse("1970-01-01T00:00:00.000Z/1970-01-01T00:00:00.001Z"), Interval.parse("1970-01-01T00:00:00.003Z/1970-01-01T00:00:00.006Z"), Interval.parse("1970-01-01T00:00:00.002Z/1970-01-01T00:00:00.005Z")), null), ImmutableList.<String>of("0", "2", "3", "4", "5"));
    // increment timestamp by 2 hours
    String timeBoosterJsFn = "function(x) { return(x + 7200000) }";
    ExtractionFn exFn = new JavaScriptExtractionFn(timeBoosterJsFn, true, JavaScriptConfig.getEnabledInstance());
    assertFilterMatches(new IntervalDimFilter(Column.TIME_COLUMN_NAME, Arrays.asList(Interval.parse("1970-01-01T02:00:00.001Z/1970-01-01T02:00:00.005Z")), exFn), ImmutableList.<String>of("1", "2", "3", "4"));
}
Also used : LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) IntervalDimFilter(io.druid.query.filter.IntervalDimFilter) Test(org.junit.Test)

Aggregations

ExtractionFn (io.druid.query.extraction.ExtractionFn)40 Test (org.junit.Test)32 JavaScriptExtractionFn (io.druid.query.extraction.JavaScriptExtractionFn)29 TimeFormatExtractionFn (io.druid.query.extraction.TimeFormatExtractionFn)24 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)24 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)21 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)19 DimExtractionFn (io.druid.query.extraction.DimExtractionFn)17 StrlenExtractionFn (io.druid.query.extraction.StrlenExtractionFn)17 DateTime (org.joda.time.DateTime)12 Result (io.druid.query.Result)10 Row (io.druid.data.input.Row)9 DimensionSpec (io.druid.query.dimension.DimensionSpec)9 CascadeExtractionFn (io.druid.query.extraction.CascadeExtractionFn)9 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)8 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)8 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)5 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)4 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)4