Search in sources :

Example 16 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TopNQueryRunnerTest method testFullOnTopNFloatColumnWithExFn.

@Test
public void testFullOnTopNFloatColumnWithExFn() {
    String jsFn = "function(str) { return 'super-' + str; }";
    ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance());
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.indexMetric, "index_alias", jsExtractionFn)).metric(QueryRunnerTestHelper.indexMetric).threshold(4).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put("index_alias", "super-1000").put(QueryRunnerTestHelper.indexMetric, 2000.0D).put("rows", 2L).put("addRowsIndexConstant", 2003.0D).put("uniques", QueryRunnerTestHelper.UNIQUES_2).put("maxIndex", 1000.0D).put("minIndex", 1000.0D).build(), ImmutableMap.<String, Object>builder().put("index_alias", "super-1870.06103515625").put(QueryRunnerTestHelper.indexMetric, 1870.06103515625D).put("rows", 1L).put("addRowsIndexConstant", 1872.06103515625D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 1870.06103515625D).put("minIndex", 1870.06103515625D).build(), ImmutableMap.<String, Object>builder().put("index_alias", "super-1862.7379150390625").put(QueryRunnerTestHelper.indexMetric, 1862.7379150390625D).put("rows", 1L).put("addRowsIndexConstant", 1864.7379150390625D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 1862.7379150390625D).put("minIndex", 1862.7379150390625D).build(), ImmutableMap.<String, Object>builder().put("index_alias", "super-1743.9217529296875").put(QueryRunnerTestHelper.indexMetric, 1743.9217529296875D).put("rows", 1L).put("addRowsIndexConstant", 1745.9217529296875D).put("uniques", QueryRunnerTestHelper.UNIQUES_1).put("maxIndex", 1743.9217529296875D).put("minIndex", 1743.9217529296875D).build()))));
    assertExpectedResults(expectedResults, query);
}
Also used : DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 17 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNWithEmptyStringProducingDimExtractionFn.

@Test
public /**
   * This test exists only to show what the current behavior is and not necessarily to define that this is
   * correct behavior.  In fact, the behavior when returning the empty string from a DimExtractionFn is, by
   * contract, undefined, so this can do anything.
   */
void testTopNWithEmptyStringProducingDimExtractionFn() {
    final ExtractionFn emptyStringDimExtraction = new DimExtractionFn() {

        @Override
        public byte[] getCacheKey() {
            return new byte[] { (byte) 0xFF };
        }

        @Override
        public String apply(String dimValue) {
            return dimValue.equals("total_market") ? "" : dimValue;
        }

        @Override
        public boolean preservesOrdering() {
            return false;
        }

        @Override
        public ExtractionType getExtractionType() {
            return ExtractionType.MANY_TO_ONE;
        }
    };
    final TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).metric("rows").threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, emptyStringDimExtraction)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), new LinkedHashMap<String, Object>() {

        {
            put(QueryRunnerTestHelper.marketDimension, "");
            put("rows", 4L);
            put("index", 5351.814697265625D);
            put("addRowsIndexConstant", 5356.814697265625D);
            put("uniques", QueryRunnerTestHelper.UNIQUES_2);
        }
    }, ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
    assertExpectedResults(expectedResults, query);
}
Also used : DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 18 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNWithNullProducingDimExtractionFn.

@Test
public void testTopNWithNullProducingDimExtractionFn() {
    final ExtractionFn nullStringDimExtraction = new DimExtractionFn() {

        @Override
        public byte[] getCacheKey() {
            return new byte[] { (byte) 0xFF };
        }

        @Override
        public String apply(String dimValue) {
            return dimValue.equals("total_market") ? null : dimValue;
        }

        @Override
        public boolean preservesOrdering() {
            return false;
        }

        @Override
        public ExtractionType getExtractionType() {
            return ExtractionType.MANY_TO_ONE;
        }
    };
    final TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).metric("rows").threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), new LinkedHashMap<String, Object>() {

        {
            put(QueryRunnerTestHelper.marketDimension, null);
            put("rows", 4L);
            put("index", 5351.814697265625D);
            put("addRowsIndexConstant", 5356.814697265625D);
            put("uniques", QueryRunnerTestHelper.UNIQUES_2);
        }
    }, ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
    assertExpectedResults(expectedResults, query);
}
Also used : DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 19 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TopNQueryRunnerTest method testFullOnTopNLongTimeColumnWithExFn.

@Test
public void testFullOnTopNLongTimeColumnWithExFn() {
    String jsFn = "function(str) { return 'super-' + str; }";
    ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance());
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "time_alias", jsExtractionFn)).metric("maxIndex").threshold(4).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put("time_alias", "super-1296345600000").put(QueryRunnerTestHelper.indexMetric, 5497.331253051758D).put("rows", 13L).put("addRowsIndexConstant", 5511.331253051758D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1870.06103515625D).put("minIndex", 97.02391052246094D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1298678400000").put(QueryRunnerTestHelper.indexMetric, 6541.463027954102D).put("rows", 13L).put("addRowsIndexConstant", 6555.463027954102D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1862.7379150390625D).put("minIndex", 83.099365234375D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1301529600000").put(QueryRunnerTestHelper.indexMetric, 6814.467971801758D).put("rows", 13L).put("addRowsIndexConstant", 6828.467971801758D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1734.27490234375D).put("minIndex", 93.39083862304688D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1294876800000").put(QueryRunnerTestHelper.indexMetric, 6077.949111938477D).put("rows", 13L).put("addRowsIndexConstant", 6091.949111938477D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1689.0128173828125D).put("minIndex", 94.87471008300781D).build()))));
    assertExpectedResults(expectedResults, query);
}
Also used : DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 20 with ExtractionFn

use of io.druid.query.extraction.ExtractionFn in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNQueryCardinalityAggregatorWithExtractionFn.

@Test
public void testTopNQueryCardinalityAggregatorWithExtractionFn() {
    String helloJsFn = "function(str) { return 'hello' }";
    ExtractionFn helloFn = new JavaScriptExtractionFn(helloJsFn, false, JavaScriptConfig.getEnabledInstance());
    DimensionSpec dimSpec = new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, helloFn);
    ImmutableList<DimensionSpec> aggregatorDimensionSpecs = ImmutableList.<DimensionSpec>of(new ExtractionDimensionSpec(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension, helloFn));
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(dimSpec).metric(new NumericTopNMetricSpec("numVals")).threshold(10).intervals(QueryRunnerTestHelper.firstToThird).aggregators(duplicateAggregators(new CardinalityAggregatorFactory("numVals", aggregatorDimensionSpecs, false), new CardinalityAggregatorFactory("numVals1", aggregatorDimensionSpecs, false))).build();
    List<Result<TopNResultValue>> expectedResults = Collections.singletonList(new Result<>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(withDuplicateResults(Collections.singletonList(ImmutableMap.<String, Object>of("market", "hello", "numVals", 1.0002442201269182d)), "numVals", "numVals1"))));
    assertExpectedResults(expectedResults, query);
}
Also used : ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) CardinalityAggregatorFactory(io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Aggregations

ExtractionFn (io.druid.query.extraction.ExtractionFn)40 Test (org.junit.Test)32 JavaScriptExtractionFn (io.druid.query.extraction.JavaScriptExtractionFn)29 TimeFormatExtractionFn (io.druid.query.extraction.TimeFormatExtractionFn)24 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)24 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)21 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)19 DimExtractionFn (io.druid.query.extraction.DimExtractionFn)17 StrlenExtractionFn (io.druid.query.extraction.StrlenExtractionFn)17 DateTime (org.joda.time.DateTime)12 Result (io.druid.query.Result)10 Row (io.druid.data.input.Row)9 DimensionSpec (io.druid.query.dimension.DimensionSpec)9 CascadeExtractionFn (io.druid.query.extraction.CascadeExtractionFn)9 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)8 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)8 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)8 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)5 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)4 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)4