use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class TopNQueryRunnerTest method testFullOnTopNStringOutputAsLong.
@Test
public void testFullOnTopNStringOutputAsLong() {
ExtractionFn strlenFn = StrlenExtractionFn.instance();
TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.QUALITY_DIMENSION, "alias", ColumnType.LONG, strlenFn)).metric("maxIndex").threshold(4).intervals(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).aggregators(Lists.newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put("alias", 9L).put(QueryRunnerTestHelper.INDEX_METRIC, 217725.41940800005D).put("rows", 279L).put("addRowsIndexConstant", 218005.41940800005D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 1870.061029D).put("minIndex", 91.270553D).build(), ImmutableMap.<String, Object>builder().put("alias", 7L).put(QueryRunnerTestHelper.INDEX_METRIC, 210865.67977600006D).put("rows", 279L).put("addRowsIndexConstant", 211145.67977600006D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 1862.737933D).put("minIndex", 99.284525D).build(), ImmutableMap.<String, Object>builder().put("alias", 10L).put(QueryRunnerTestHelper.INDEX_METRIC, 20479.497562408447D).put("rows", 186L).put("addRowsIndexConstant", 20666.497562408447D).put("uniques", QueryRunnerTestHelper.UNIQUES_2).put("maxIndex", 277.273533D).put("minIndex", 59.02102279663086D).build(), ImmutableMap.<String, Object>builder().put("alias", 13L).put(QueryRunnerTestHelper.INDEX_METRIC, 12086.472791D).put("rows", 93L).put("addRowsIndexConstant", 12180.472791D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 193.787574D).put("minIndex", 84.710523D).build()))));
assertExpectedResults(expectedResults, query);
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class TopNQueryRunnerTest method testTopNWithNullProducingDimExtractionFn.
@Test
public void testTopNWithNullProducingDimExtractionFn() {
final ExtractionFn nullStringDimExtraction = new DimExtractionFn() {
@Override
public byte[] getCacheKey() {
return new byte[] { (byte) 0xFF };
}
@Override
public String apply(String dimValue) {
return "total_market".equals(dimValue) ? null : dimValue;
}
@Override
public boolean preservesOrdering() {
return false;
}
@Override
public ExtractionType getExtractionType() {
return ExtractionType.MANY_TO_ONE;
}
};
final TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).granularity(QueryRunnerTestHelper.ALL_GRAN).metric("rows").threshold(4).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(commonAggregators).postAggregators(QueryRunnerTestHelper.ADD_ROWS_INDEX_CONSTANT).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.MARKET_DIMENSION, QueryRunnerTestHelper.MARKET_DIMENSION, nullStringDimExtraction)).build();
List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.asList(ImmutableMap.of(QueryRunnerTestHelper.MARKET_DIMENSION, "spot", "rows", 18L, "index", 2231.876812D, "addRowsIndexConstant", 2250.876812D, "uniques", QueryRunnerTestHelper.UNIQUES_9), QueryRunnerTestHelper.orderedMap(QueryRunnerTestHelper.MARKET_DIMENSION, null, "rows", 4L, "index", 5351.814783D, "addRowsIndexConstant", 5356.814783D, "uniques", QueryRunnerTestHelper.UNIQUES_2), ImmutableMap.of(QueryRunnerTestHelper.MARKET_DIMENSION, "upfront", "rows", 4L, "index", 4875.669692D, "addRowsIndexConstant", 4880.669692D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
assertExpectedResults(expectedResults, query);
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class FilteredAggregatorTest method testAggregateWithExtractionFns.
@Test
public void testAggregateWithExtractionFns() {
final float[] values = { 0.15f, 0.27f };
TestFloatColumnSelector selector;
FilteredAggregatorFactory factory;
String extractionJsFn = "function(str) { return str + 'AARDVARK'; }";
ExtractionFn extractionFn = new JavaScriptExtractionFn(extractionJsFn, false, JavaScriptConfig.getEnabledInstance());
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new SelectorDimFilter("dim", "aAARDVARK", extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new InDimFilter("dim", Arrays.asList("NOT-aAARDVARK", "FOOBAR", "aAARDVARK"), extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new BoundDimFilter("dim", "aAARDVARK", "aAARDVARK", false, false, true, extractionFn, StringComparators.ALPHANUMERIC));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new RegexDimFilter("dim", "aAARDVARK", extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new SearchQueryDimFilter("dim", new ContainsSearchQuerySpec("aAARDVARK", true), extractionFn));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
String jsFn = "function(x) { return(x === 'aAARDVARK') }";
factory = new FilteredAggregatorFactory(new DoubleSumAggregatorFactory("billy", "value"), new JavaScriptDimFilter("dim", jsFn, extractionFn, JavaScriptConfig.getEnabledInstance()));
selector = new TestFloatColumnSelector(values);
validateFilteredAggs(factory, values, selector);
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class DimensionSelectorHavingSpecTest method testEquals.
@Test
public void testEquals() {
ExtractionFn extractionFn1 = new RegexDimExtractionFn("^([^,]*),", false, "");
ExtractionFn extractionFn2 = new RegexDimExtractionFn(",(.*)", false, "");
ExtractionFn extractionFn3 = new RegexDimExtractionFn("^([^,]*),", false, "");
HavingSpec dimHavingSpec1 = new DimensionSelectorHavingSpec("dim", "v", extractionFn1);
HavingSpec dimHavingSpec2 = new DimensionSelectorHavingSpec("dim", "v", extractionFn3);
HavingSpec dimHavingSpec3 = new DimensionSelectorHavingSpec("dim1", "v", null);
HavingSpec dimHavingSpec4 = new DimensionSelectorHavingSpec("dim2", "v", null);
HavingSpec dimHavingSpec5 = new DimensionSelectorHavingSpec("dim", "v1", null);
HavingSpec dimHavingSpec6 = new DimensionSelectorHavingSpec("dim", "v2", null);
HavingSpec dimHavingSpec7 = new DimensionSelectorHavingSpec("dim", null, null);
HavingSpec dimHavingSpec8 = new DimensionSelectorHavingSpec("dim", null, null);
HavingSpec dimHavingSpec9 = new DimensionSelectorHavingSpec("dim1", null, null);
HavingSpec dimHavingSpec10 = new DimensionSelectorHavingSpec("dim2", null, null);
HavingSpec dimHavingSpec11 = new DimensionSelectorHavingSpec("dim1", "v", null);
HavingSpec dimHavingSpec12 = new DimensionSelectorHavingSpec("dim2", null, null);
HavingSpec dimHavingSpec13 = new DimensionSelectorHavingSpec("dim", "value", extractionFn1);
HavingSpec dimHavingSpec14 = new DimensionSelectorHavingSpec("dim", "value", extractionFn2);
Assert.assertEquals(dimHavingSpec1, dimHavingSpec2);
Assert.assertNotEquals(dimHavingSpec3, dimHavingSpec4);
Assert.assertNotEquals(dimHavingSpec5, dimHavingSpec6);
Assert.assertEquals(dimHavingSpec7, dimHavingSpec8);
Assert.assertNotEquals(dimHavingSpec9, dimHavingSpec10);
Assert.assertNotEquals(dimHavingSpec11, dimHavingSpec12);
Assert.assertNotEquals(dimHavingSpec13, dimHavingSpec14);
}
use of org.apache.druid.query.extraction.ExtractionFn in project druid by druid-io.
the class Expressions method toSimpleLeafFilter.
/**
* Translates to a simple leaf filter, i.e. not an "expression" type filter. Note that the filter may still
* reference expression virtual columns, if and only if "virtualColumnRegistry" is defined.
*
* @param plannerContext planner context
* @param rowSignature input row signature
* @param virtualColumnRegistry re-usable virtual column references, may be null if virtual columns aren't allowed
* @param rexNode Calcite row expression
*/
@Nullable
private static DimFilter toSimpleLeafFilter(final PlannerContext plannerContext, final RowSignature rowSignature, @Nullable final VirtualColumnRegistry virtualColumnRegistry, final RexNode rexNode) {
final SqlKind kind = rexNode.getKind();
if (kind == SqlKind.IS_TRUE || kind == SqlKind.IS_NOT_FALSE) {
return toSimpleLeafFilter(plannerContext, rowSignature, virtualColumnRegistry, Iterables.getOnlyElement(((RexCall) rexNode).getOperands()));
} else if (kind == SqlKind.IS_FALSE || kind == SqlKind.IS_NOT_TRUE) {
return new NotDimFilter(toSimpleLeafFilter(plannerContext, rowSignature, virtualColumnRegistry, Iterables.getOnlyElement(((RexCall) rexNode).getOperands())));
} else if (kind == SqlKind.IS_NULL || kind == SqlKind.IS_NOT_NULL) {
final RexNode operand = Iterables.getOnlyElement(((RexCall) rexNode).getOperands());
final DruidExpression druidExpression = toDruidExpression(plannerContext, rowSignature, operand);
if (druidExpression == null) {
return null;
}
final DimFilter equalFilter;
if (druidExpression.isSimpleExtraction()) {
equalFilter = new SelectorDimFilter(druidExpression.getSimpleExtraction().getColumn(), NullHandling.defaultStringValue(), druidExpression.getSimpleExtraction().getExtractionFn());
} else if (virtualColumnRegistry != null) {
final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(druidExpression, operand.getType());
equalFilter = new SelectorDimFilter(virtualColumn, NullHandling.defaultStringValue(), null);
} else {
return null;
}
return kind == SqlKind.IS_NOT_NULL ? new NotDimFilter(equalFilter) : equalFilter;
} else if (kind == SqlKind.EQUALS || kind == SqlKind.NOT_EQUALS || kind == SqlKind.GREATER_THAN || kind == SqlKind.GREATER_THAN_OR_EQUAL || kind == SqlKind.LESS_THAN || kind == SqlKind.LESS_THAN_OR_EQUAL) {
final List<RexNode> operands = ((RexCall) rexNode).getOperands();
Preconditions.checkState(operands.size() == 2, "Expected 2 operands, got[%,d]", operands.size());
boolean flip = false;
RexNode lhs = operands.get(0);
RexNode rhs = operands.get(1);
if (lhs.getKind() == SqlKind.LITERAL && rhs.getKind() != SqlKind.LITERAL) {
// swap lhs, rhs
RexNode x = lhs;
lhs = rhs;
rhs = x;
flip = true;
}
// Flip operator, maybe.
final SqlKind flippedKind;
if (flip) {
switch(kind) {
case EQUALS:
case NOT_EQUALS:
flippedKind = kind;
break;
case GREATER_THAN:
flippedKind = SqlKind.LESS_THAN;
break;
case GREATER_THAN_OR_EQUAL:
flippedKind = SqlKind.LESS_THAN_OR_EQUAL;
break;
case LESS_THAN:
flippedKind = SqlKind.GREATER_THAN;
break;
case LESS_THAN_OR_EQUAL:
flippedKind = SqlKind.GREATER_THAN_OR_EQUAL;
break;
default:
throw new ISE("Kind[%s] not expected here", kind);
}
} else {
flippedKind = kind;
}
// rhs must be a literal
if (rhs.getKind() != SqlKind.LITERAL) {
return null;
}
// Translate lhs to a DruidExpression.
final DruidExpression lhsExpression = toDruidExpression(plannerContext, rowSignature, lhs);
if (lhsExpression == null) {
return null;
}
// Special handling for filters on FLOOR(__time TO granularity).
final Granularity queryGranularity = toQueryGranularity(lhsExpression, plannerContext.getExprMacroTable());
if (queryGranularity != null) {
// lhs is FLOOR(__time TO granularity); rhs must be a timestamp
final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis();
return buildTimeFloorFilter(ColumnHolder.TIME_COLUMN_NAME, queryGranularity, flippedKind, rhsMillis);
}
final String column;
final ExtractionFn extractionFn;
if (lhsExpression.isSimpleExtraction()) {
column = lhsExpression.getSimpleExtraction().getColumn();
extractionFn = lhsExpression.getSimpleExtraction().getExtractionFn();
} else if (virtualColumnRegistry != null) {
column = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(lhsExpression, lhs.getType());
extractionFn = null;
} else {
return null;
}
if (column.equals(ColumnHolder.TIME_COLUMN_NAME) && extractionFn instanceof TimeFormatExtractionFn) {
// Check if we can strip the extractionFn and convert the filter to a direct filter on __time.
// This allows potential conversion to query-level "intervals" later on, which is ideal for Druid queries.
final Granularity granularity = ExtractionFns.toQueryGranularity(extractionFn);
if (granularity != null) {
// lhs is FLOOR(__time TO granularity); rhs must be a timestamp
final long rhsMillis = Calcites.calciteDateTimeLiteralToJoda(rhs, plannerContext.getTimeZone()).getMillis();
final Interval rhsInterval = granularity.bucket(DateTimes.utc(rhsMillis));
// Is rhs aligned on granularity boundaries?
final boolean rhsAligned = rhsInterval.getStartMillis() == rhsMillis;
// Create a BoundRefKey that strips the extractionFn and compares __time as a number.
final BoundRefKey boundRefKey = new BoundRefKey(column, null, StringComparators.NUMERIC);
return getBoundTimeDimFilter(flippedKind, boundRefKey, rhsInterval, rhsAligned);
}
}
final String val;
final RexLiteral rhsLiteral = (RexLiteral) rhs;
if (SqlTypeName.NUMERIC_TYPES.contains(rhsLiteral.getTypeName())) {
val = String.valueOf(RexLiteral.value(rhsLiteral));
} else if (SqlTypeName.CHAR_TYPES.contains(rhsLiteral.getTypeName())) {
val = String.valueOf(RexLiteral.stringValue(rhsLiteral));
} else if (SqlTypeName.TIMESTAMP == rhsLiteral.getTypeName() || SqlTypeName.DATE == rhsLiteral.getTypeName()) {
val = String.valueOf(Calcites.calciteDateTimeLiteralToJoda(rhsLiteral, plannerContext.getTimeZone()).getMillis());
} else {
// Don't know how to filter on this kind of literal.
return null;
}
// Numeric lhs needs a numeric comparison.
final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
final DimFilter filter;
// Always use BoundDimFilters, to simplify filter optimization later (it helps to remember the comparator).
switch(flippedKind) {
case EQUALS:
filter = Bounds.equalTo(boundRefKey, val);
break;
case NOT_EQUALS:
filter = new NotDimFilter(Bounds.equalTo(boundRefKey, val));
break;
case GREATER_THAN:
filter = Bounds.greaterThan(boundRefKey, val);
break;
case GREATER_THAN_OR_EQUAL:
filter = Bounds.greaterThanOrEqualTo(boundRefKey, val);
break;
case LESS_THAN:
filter = Bounds.lessThan(boundRefKey, val);
break;
case LESS_THAN_OR_EQUAL:
filter = Bounds.lessThanOrEqualTo(boundRefKey, val);
break;
default:
throw new IllegalStateException("Shouldn't have got here");
}
return filter;
} else if (rexNode instanceof RexCall) {
final SqlOperator operator = ((RexCall) rexNode).getOperator();
final SqlOperatorConversion conversion = plannerContext.getOperatorTable().lookupOperatorConversion(operator);
if (conversion == null) {
return null;
} else {
return conversion.toDruidFilter(plannerContext, rowSignature, virtualColumnRegistry, rexNode);
}
} else {
return null;
}
}
Aggregations