use of org.apache.druid.query.extraction.RegexDimExtractionFn in project druid by druid-io.
the class DimensionSelectorHavingSpecTest method testDimensionFilterSpec.
@Test
public void testDimensionFilterSpec() {
DimensionSelectorHavingSpec spec = new DimensionSelectorHavingSpec("dimension", "v", null);
Assert.assertTrue(spec.eval(getTestRow("v")));
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of("v", "v1"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of())));
Assert.assertFalse(spec.eval(getTestRow("v1")));
spec = new DimensionSelectorHavingSpec("dimension", null, null);
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of())));
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of(""))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v", "v1"))));
spec = new DimensionSelectorHavingSpec("dimension", "", null);
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of())));
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of(""))));
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of("v", "v1", ""))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v", "v1"))));
ExtractionFn extractionFn = new RegexDimExtractionFn("^([^,]*),", true, "default");
spec = new DimensionSelectorHavingSpec("dimension", "v", extractionFn);
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of("v,v1", "v2,v3"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v1,v4"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v"))));
Assert.assertFalse(spec.eval(getTestRow(ImmutableList.of("v1", "default"))));
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of("v,default", "none"))));
spec = new DimensionSelectorHavingSpec("dimension", "default", extractionFn);
Assert.assertTrue(spec.eval(getTestRow(ImmutableList.of("v1,v2", "none"))));
}
use of org.apache.druid.query.extraction.RegexDimExtractionFn in project druid by druid-io.
the class ExtractionDimensionSpecTest method testSerdeBackwardsCompatibility.
@Test
public void testSerdeBackwardsCompatibility() throws Exception {
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String oldJson = "{\n" + " \"type\": \"extraction\",\n" + " \"outputName\": \"first3Letters\",\n" + " \"dimension\": \"myDim\"," + " \"dimExtractionFn\": {\n" + " \"type\": \"regex\",\n" + " \"expr\": \"(...).*\"\n" + " }\n" + "}";
final ExtractionDimensionSpec extractionDimensionSpec = (ExtractionDimensionSpec) objectMapper.readValue(oldJson, DimensionSpec.class);
Assert.assertEquals("first3Letters", extractionDimensionSpec.getOutputName());
Assert.assertEquals("myDim", extractionDimensionSpec.getDimension());
Assert.assertNotNull(extractionDimensionSpec.getExtractionFn());
Assert.assertTrue(extractionDimensionSpec.getExtractionFn() instanceof RegexDimExtractionFn);
Assert.assertEquals(extractionDimensionSpec, objectMapper.readValue(objectMapper.writeValueAsBytes(extractionDimensionSpec), DimensionSpec.class));
// new trumps old
final String oldAndNewJson = "{\n" + " \"type\": \"extraction\",\n" + " \"outputName\": \"first3Letters\",\n" + " \"dimension\": \"myDim\"," + " \"extractionFn\": {\n" + " \"type\": \"partial\",\n" + " \"expr\": \"(...).*\"\n" + " },\n" + " \"dimExtractionFn\": {\n" + " \"type\": \"regex\",\n" + " \"expr\": \"(...).*\"\n" + " }\n" + "}";
Assert.assertTrue(objectMapper.readValue(oldAndNewJson, DimensionSpec.class).getExtractionFn() instanceof MatchingDimExtractionFn);
}
use of org.apache.druid.query.extraction.RegexDimExtractionFn in project druid by druid-io.
the class RegexpExtractOperatorConversion method toDruidExpression.
@Override
public DruidExpression toDruidExpression(final PlannerContext plannerContext, final RowSignature rowSignature, final RexNode rexNode) {
return OperatorConversions.convertDirectCallWithExtraction(plannerContext, rowSignature, rexNode, StringUtils.toLowerCase(calciteOperator().getName()), inputExpressions -> {
final DruidExpression arg = inputExpressions.get(0);
final Expr patternExpr = inputExpressions.get(1).parse(plannerContext.getExprMacroTable());
final Expr indexExpr = inputExpressions.size() > 2 ? inputExpressions.get(2).parse(plannerContext.getExprMacroTable()) : null;
if (arg.isSimpleExtraction() && patternExpr.isLiteral() && (indexExpr == null || indexExpr.isLiteral())) {
final String pattern = (String) patternExpr.getLiteralValue();
return arg.getSimpleExtraction().cascade(new RegexDimExtractionFn(// non-SQL-compliant null handling mode).
StringUtils.nullToEmptyNonDruidDataString(pattern), indexExpr == null ? DEFAULT_INDEX : ((Number) indexExpr.getLiteralValue()).intValue(), true, null));
} else {
return null;
}
});
}
use of org.apache.druid.query.extraction.RegexDimExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithNullProducingDimExtractionFn.
@Test
public void testGroupByWithNullProducingDimExtractionFn() {
// Cannot vectorize due to extraction dimension spec.
cannotVectorize();
final ExtractionFn nullExtractionFn = new RegexDimExtractionFn("(\\w{1})", false, null) {
@Override
public byte[] getCacheKey() {
return new byte[] { (byte) 0xFF };
}
@Override
public String apply(String dimValue) {
return "mezzanine".equals(dimValue) ? null : super.apply(dimValue);
}
};
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setDimensions(new ExtractionDimensionSpec("quality", "alias", nullExtractionFn)).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", null, "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), makeRow(query, "2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), makeRow(query, "2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), makeRow(query, "2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), makeRow(query, "2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), makeRow(query, "2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), makeRow(query, "2011-04-02", "alias", null, "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), makeRow(query, "2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), makeRow(query, "2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), makeRow(query, "2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), makeRow(query, "2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), makeRow(query, "2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), makeRow(query, "2011-04-02", "alias", "t", "rows", 2L, "idx", 223L));
TestHelper.assertExpectedObjects(expectedResults, GroupByQueryRunnerTestHelper.runQuery(factory, runner, query), "null-dimextraction");
}
use of org.apache.druid.query.extraction.RegexDimExtractionFn in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByWithEmptyStringProducingDimExtractionFn.
@Test
@Ignore
public /**
* This test exists only to show what the current behavior is and not necessarily to define that this is
* correct behavior. In fact, the behavior when returning the empty string from a DimExtractionFn is, by
* contract, undefined, so this can do anything.
*/
void testGroupByWithEmptyStringProducingDimExtractionFn() {
final ExtractionFn emptyStringExtractionFn = new RegexDimExtractionFn("(\\w{1})", false, null) {
@Override
public byte[] getCacheKey() {
return new byte[] { (byte) 0xFF };
}
@Override
public String apply(String dimValue) {
return "mezzanine".equals(dimValue) ? "" : super.apply(dimValue);
}
};
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).setDimensions(new ExtractionDimensionSpec("quality", "alias", emptyStringExtractionFn)).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "", "rows", 3L, "idx", 2870L), makeRow(query, "2011-04-01", "alias", "a", "rows", 1L, "idx", 135L), makeRow(query, "2011-04-01", "alias", "b", "rows", 1L, "idx", 118L), makeRow(query, "2011-04-01", "alias", "e", "rows", 1L, "idx", 158L), makeRow(query, "2011-04-01", "alias", "h", "rows", 1L, "idx", 120L), makeRow(query, "2011-04-01", "alias", "n", "rows", 1L, "idx", 121L), makeRow(query, "2011-04-01", "alias", "p", "rows", 3L, "idx", 2900L), makeRow(query, "2011-04-01", "alias", "t", "rows", 2L, "idx", 197L), makeRow(query, "2011-04-02", "alias", "", "rows", 3L, "idx", 2447L), makeRow(query, "2011-04-02", "alias", "a", "rows", 1L, "idx", 147L), makeRow(query, "2011-04-02", "alias", "b", "rows", 1L, "idx", 112L), makeRow(query, "2011-04-02", "alias", "e", "rows", 1L, "idx", 166L), makeRow(query, "2011-04-02", "alias", "h", "rows", 1L, "idx", 113L), makeRow(query, "2011-04-02", "alias", "n", "rows", 1L, "idx", 114L), makeRow(query, "2011-04-02", "alias", "p", "rows", 3L, "idx", 2505L), makeRow(query, "2011-04-02", "alias", "t", "rows", 2L, "idx", 223L));
TestHelper.assertExpectedObjects(expectedResults, GroupByQueryRunnerTestHelper.runQuery(factory, runner, query), "empty-string-dimextraction");
}
Aggregations