Search in sources :

Example 46 with ExtractionDimensionSpec

use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNLexicographicDimExtractionOptimalNamespace.

@Test
public void testTopNLexicographicDimExtractionOptimalNamespace() {
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new LookupExtractionFn(new MapLookupExtractor(ImmutableMap.of("spot", "2spot", "total_market", "3total_market", "upfront", "1upfront"), false), true, null, true, false))).metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)).threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "1upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "2spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "3total_market", "rows", 4L, "index", 5351.814697265625D, "addRowsIndexConstant", 5356.814697265625D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
    assertExpectedResults(expectedResults, query);
}
Also used : HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) MapLookupExtractor(io.druid.query.extraction.MapLookupExtractor) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 47 with ExtractionDimensionSpec

use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNWithNullProducingDimExtractionFn.

@Test
public void testTopNWithNullProducingDimExtractionFn() {
    final ExtractionFn nullStringDimExtraction = new DimExtractionFn() {

        @Override
        public byte[] getCacheKey() {
            return new byte[] { (byte) 0xFF };
        }

        @Override
        public String apply(String dimValue) {
            return dimValue.equals("total_market") ? null : dimValue;
        }

        @Override
        public boolean preservesOrdering() {
            return false;
        }

        @Override
        public ExtractionType getExtractionType() {
            return ExtractionType.MANY_TO_ONE;
        }
    };
    final TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).metric("rows").threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, nullStringDimExtraction)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), new LinkedHashMap<String, Object>() {

        {
            put(QueryRunnerTestHelper.marketDimension, null);
            put("rows", 4L);
            put("index", 5351.814697265625D);
            put("addRowsIndexConstant", 5356.814697265625D);
            put("uniques", QueryRunnerTestHelper.UNIQUES_2);
        }
    }, ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
    assertExpectedResults(expectedResults, query);
}
Also used : DateTime(org.joda.time.DateTime) LinkedHashMap(java.util.LinkedHashMap) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 48 with ExtractionDimensionSpec

use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class TopNQueryRunnerTest method testFullOnTopNLongTimeColumnWithExFn.

@Test
public void testFullOnTopNLongTimeColumnWithExFn() {
    String jsFn = "function(str) { return 'super-' + str; }";
    ExtractionFn jsExtractionFn = new JavaScriptExtractionFn(jsFn, false, JavaScriptConfig.getEnabledInstance());
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(Column.TIME_COLUMN_NAME, "time_alias", jsExtractionFn)).metric("maxIndex").threshold(4).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(QueryRunnerTestHelper.commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>builder().put("time_alias", "super-1296345600000").put(QueryRunnerTestHelper.indexMetric, 5497.331253051758D).put("rows", 13L).put("addRowsIndexConstant", 5511.331253051758D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1870.06103515625D).put("minIndex", 97.02391052246094D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1298678400000").put(QueryRunnerTestHelper.indexMetric, 6541.463027954102D).put("rows", 13L).put("addRowsIndexConstant", 6555.463027954102D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1862.7379150390625D).put("minIndex", 83.099365234375D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1301529600000").put(QueryRunnerTestHelper.indexMetric, 6814.467971801758D).put("rows", 13L).put("addRowsIndexConstant", 6828.467971801758D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1734.27490234375D).put("minIndex", 93.39083862304688D).build(), ImmutableMap.<String, Object>builder().put("time_alias", "super-1294876800000").put(QueryRunnerTestHelper.indexMetric, 6077.949111938477D).put("rows", 13L).put("addRowsIndexConstant", 6091.949111938477D).put("uniques", QueryRunnerTestHelper.UNIQUES_9).put("maxIndex", 1689.0128173828125D).put("minIndex", 94.87471008300781D).build()))));
    assertExpectedResults(expectedResults, query);
}
Also used : DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) ExtractionFn(io.druid.query.extraction.ExtractionFn) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) DimExtractionFn(io.druid.query.extraction.DimExtractionFn) StrlenExtractionFn(io.druid.query.extraction.StrlenExtractionFn) JavaScriptExtractionFn(io.druid.query.extraction.JavaScriptExtractionFn) TimeFormatExtractionFn(io.druid.query.extraction.TimeFormatExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 49 with ExtractionDimensionSpec

use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNLexicographicDimExtractionOptimalNamespaceWithRunner.

@Test
public void testTopNLexicographicDimExtractionOptimalNamespaceWithRunner() {
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension, new LookupExtractionFn(new MapLookupExtractor(ImmutableMap.of("spot", "2spot", "total_market", "3total_market", "upfront", "1upfront"), false), true, null, true, false))).metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC)).threshold(4).intervals(QueryRunnerTestHelper.firstToThird).aggregators(QueryRunnerTestHelper.commonAggregators).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-04-01T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "1upfront", "rows", 4L, "index", 4875.669677734375D, "addRowsIndexConstant", 4880.669677734375D, "uniques", QueryRunnerTestHelper.UNIQUES_2), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "2spot", "rows", 18L, "index", 2231.8768157958984D, "addRowsIndexConstant", 2250.8768157958984D, "uniques", QueryRunnerTestHelper.UNIQUES_9), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.marketDimension, "3total_market", "rows", 4L, "index", 5351.814697265625D, "addRowsIndexConstant", 5356.814697265625D, "uniques", QueryRunnerTestHelper.UNIQUES_2)))));
    assertExpectedResults(expectedResults, query);
}
Also used : HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) MapLookupExtractor(io.druid.query.extraction.MapLookupExtractor) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 50 with ExtractionDimensionSpec

use of io.druid.query.dimension.ExtractionDimensionSpec in project druid by druid-io.

the class TopNQueryRunnerTest method testTopNCollapsingDimExtraction.

@Test
public void testTopNCollapsingDimExtraction() {
    TopNQuery query = new TopNQueryBuilder().dataSource(QueryRunnerTestHelper.dataSource).granularity(QueryRunnerTestHelper.allGran).dimension(new ExtractionDimensionSpec(QueryRunnerTestHelper.qualityDimension, QueryRunnerTestHelper.qualityDimension, new RegexDimExtractionFn(".(.)", false, null))).metric("index").threshold(2).intervals(QueryRunnerTestHelper.fullOnInterval).aggregators(Arrays.asList(QueryRunnerTestHelper.rowsCount, QueryRunnerTestHelper.indexDoubleSum)).postAggregators(Arrays.<PostAggregator>asList(QueryRunnerTestHelper.addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<Map<String, Object>>asList(ImmutableMap.<String, Object>of(QueryRunnerTestHelper.qualityDimension, "e", "rows", 558L, "index", 246645.1204032898, "addRowsIndexConstant", 247204.1204032898), ImmutableMap.<String, Object>of(QueryRunnerTestHelper.qualityDimension, "r", "rows", 372L, "index", 222051.08961486816, "addRowsIndexConstant", 222424.08961486816)))));
    assertExpectedResults(expectedResults, query);
    query = query.withAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new DoubleSumAggregatorFactory("index", null, "-index + 100")));
    expectedResults = Arrays.asList(TopNQueryRunnerTestHelper.createExpectedRows("2011-01-12T00:00:00.000Z", new String[] { QueryRunnerTestHelper.qualityDimension, "rows", "index", "addRowsIndexConstant" }, Arrays.asList(new Object[] { "n", 93L, -2786.472755432129, -2692.472755432129 }, new Object[] { "u", 186L, -3949.824363708496, -3762.824363708496 })));
    assertExpectedResults(expectedResults, query);
}
Also used : HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) DateTime(org.joda.time.DateTime) Result(io.druid.query.Result) RegexDimExtractionFn(io.druid.query.extraction.RegexDimExtractionFn) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Aggregations

ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)59 Test (org.junit.Test)56 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)36 Result (io.druid.query.Result)29 DateTime (org.joda.time.DateTime)29 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)26 PostAggregator (io.druid.query.aggregation.PostAggregator)24 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)24 TimeFormatExtractionFn (io.druid.query.extraction.TimeFormatExtractionFn)23 JavaScriptExtractionFn (io.druid.query.extraction.JavaScriptExtractionFn)19 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)18 ExtractionFn (io.druid.query.extraction.ExtractionFn)18 MapLookupExtractor (io.druid.query.extraction.MapLookupExtractor)18 Row (io.druid.data.input.Row)17 DimExtractionFn (io.druid.query.extraction.DimExtractionFn)17 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)16 StrlenExtractionFn (io.druid.query.extraction.StrlenExtractionFn)15 DimensionSpec (io.druid.query.dimension.DimensionSpec)13 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)10 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)10