Search in sources :

Example 66 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty.

@Test
public void testGroupByWithExtractionDimFilterCaseMappingValueIsNullOrEmpty() {
    Map<String, String> extractionMap = new HashMap<>();
    extractionMap.put("automotive", "automotive0");
    extractionMap.put("business", "business0");
    extractionMap.put("entertainment", "entertainment0");
    extractionMap.put("health", "health0");
    extractionMap.put("mezzanine", null);
    extractionMap.put("news", "");
    extractionMap.put("premium", "premium0");
    extractionMap.put("technology", "technology0");
    extractionMap.put("travel", "travel0");
    MapLookupExtractor mapLookupExtractor = new MapLookupExtractor(extractionMap, false);
    LookupExtractionFn lookupExtractionFn = new LookupExtractionFn(mapLookupExtractor, false, null, true, false);
    GroupByQuery query = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"))).setGranularity(QueryRunnerTestHelper.dayGran).setDimFilter(new ExtractionDimFilter("quality", "", lookupExtractionFn, null)).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : LookupExtractionFn(io.druid.query.lookup.LookupExtractionFn) HashMap(java.util.HashMap) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) MapLookupExtractor(io.druid.query.extraction.MapLookupExtractor) Row(io.druid.data.input.Row) ExtractionDimFilter(io.druid.query.filter.ExtractionDimFilter) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 67 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class DefaultLimitSpecTest method testBuildWithExplicitOrder.

@Test
public void testBuildWithExplicitOrder() {
    DefaultLimitSpec limitSpec = new DefaultLimitSpec(ImmutableList.of(new OrderByColumnSpec("k1", OrderByColumnSpec.Direction.ASCENDING)), 2);
    Function<Sequence<Row>, Sequence<Row>> limitFn = limitSpec.build(ImmutableList.<DimensionSpec>of(new DefaultDimensionSpec("k1", "k1")), ImmutableList.<AggregatorFactory>of(new LongSumAggregatorFactory("k2", "k2")), ImmutableList.<PostAggregator>of(new ConstantPostAggregator("k3", 1L)));
    Assert.assertEquals(ImmutableList.of(testRowsList.get(0), testRowsList.get(1)), Sequences.toList(limitFn.apply(testRowsSequence), new ArrayList<Row>()));
    // if there is an aggregator with same name then that is used to build ordering
    limitFn = limitSpec.build(ImmutableList.<DimensionSpec>of(new DefaultDimensionSpec("k1", "k1")), ImmutableList.<AggregatorFactory>of(new LongSumAggregatorFactory("k1", "k1")), ImmutableList.<PostAggregator>of(new ConstantPostAggregator("k3", 1L)));
    Assert.assertEquals(ImmutableList.of(testRowsList.get(2), testRowsList.get(0)), Sequences.toList(limitFn.apply(testRowsSequence), new ArrayList<Row>()));
    // if there is a post-aggregator with same name then that is used to build ordering
    limitFn = limitSpec.build(ImmutableList.<DimensionSpec>of(new DefaultDimensionSpec("k1", "k1")), ImmutableList.<AggregatorFactory>of(new LongSumAggregatorFactory("k2", "k2")), ImmutableList.<PostAggregator>of(new ArithmeticPostAggregator("k1", "+", ImmutableList.<PostAggregator>of(new ConstantPostAggregator("x", 1), new ConstantPostAggregator("y", 1)))));
    Assert.assertEquals((List) ImmutableList.of(testRowsList.get(2), testRowsList.get(0)), (List) Sequences.toList(limitFn.apply(testRowsSequence), new ArrayList<Row>()));
    // makes same result
    limitFn = limitSpec.build(ImmutableList.<DimensionSpec>of(new DefaultDimensionSpec("k1", "k1")), ImmutableList.<AggregatorFactory>of(new LongSumAggregatorFactory("k2", "k2")), ImmutableList.<PostAggregator>of(new ExpressionPostAggregator("k1", "1 + 1")));
    Assert.assertEquals((List) ImmutableList.of(testRowsList.get(2), testRowsList.get(0)), (List) Sequences.toList(limitFn.apply(testRowsSequence), new ArrayList<Row>()));
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) ArrayList(java.util.ArrayList) Sequence(io.druid.java.util.common.guava.Sequence) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Test(org.junit.Test)

Example 68 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class SketchAggregationTest method testThetaCardinalityOnSimpleColumn.

@Test
public void testThetaCardinalityOnSimpleColumn() throws Exception {
    Sequence<Row> seq = helper.createIndexAndRunQueryOnSegment(new File(SketchAggregationTest.class.getClassLoader().getResource("simple_test_data.tsv").getFile()), readFileFromClasspathAsString("simple_test_data_record_parser2.json"), "[" + "  {" + "    \"type\": \"count\"," + "    \"name\": \"count\"" + "  }" + "]", 0, Granularities.NONE, 5, readFileFromClasspathAsString("simple_test_data_group_by_query.json"));
    List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
    Assert.assertEquals(5, results.size());
    Assert.assertEquals(ImmutableList.of(new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_3").put("sketch_count", 38.0).put("sketchEstimatePostAgg", 38.0).put("sketchUnionPostAggEstimate", 38.0).put("sketchIntersectionPostAggEstimate", 38.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_1").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_2").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_4").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_5").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build())), results);
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) File(java.io.File) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 69 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class SketchAggregationTestWithSimpleData method testSimpleDataIngestAndGpByQuery.

@Test
public void testSimpleDataIngestAndGpByQuery() throws Exception {
    AggregationTestHelper gpByQueryAggregationTestHelper = AggregationTestHelper.createGroupByQueryAggregationTestHelper(sm.getJacksonModules(), config, tempFolder);
    Sequence seq = gpByQueryAggregationTestHelper.runQueryOnSegments(ImmutableList.of(s1, s2), readFileFromClasspathAsString("simple_test_data_group_by_query.json"));
    List<Row> results = Sequences.toList(seq, Lists.<Row>newArrayList());
    Assert.assertEquals(5, results.size());
    Assert.assertEquals(ImmutableList.of(new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_3").put("sketch_count", 38.0).put("sketchEstimatePostAgg", 38.0).put("sketchUnionPostAggEstimate", 38.0).put("sketchIntersectionPostAggEstimate", 38.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_1").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_2").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_4").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_5").put("sketch_count", 42.0).put("sketchEstimatePostAgg", 42.0).put("sketchUnionPostAggEstimate", 42.0).put("sketchIntersectionPostAggEstimate", 42.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build())), results);
}
Also used : MapBasedRow(io.druid.data.input.MapBasedRow) AggregationTestHelper(io.druid.query.aggregation.AggregationTestHelper) Sequence(io.druid.java.util.common.guava.Sequence) Row(io.druid.data.input.Row) MapBasedRow(io.druid.data.input.MapBasedRow) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 70 with Row

use of io.druid.data.input.Row in project druid by druid-io.

the class ApproximateHistogramGroupByQueryTest method testGroupByWithApproximateHistogramAgg.

@Test
public void testGroupByWithApproximateHistogramAgg() {
    ApproximateHistogramAggregatorFactory aggFactory = new ApproximateHistogramAggregatorFactory("apphisto", "index", 10, 5, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, "marketalias"))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec("marketalias", OrderByColumnSpec.Direction.DESCENDING)), 1)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, aggFactory)).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new QuantilePostAggregator("quantile", "apphisto", 0.5f))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "marketalias", "upfront", "rows", 186L, "quantile", 880.9881f, "apphisto", new Histogram(new float[] { 214.97299194335938f, 545.9906005859375f, 877.0081787109375f, 1208.0257568359375f, 1539.0433349609375f, 1870.06103515625f }, new double[] { 0.0, 67.53287506103516, 72.22068786621094, 31.984678268432617, 14.261756896972656 })));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "approx-histo");
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Aggregations

Row (io.druid.data.input.Row)167 Test (org.junit.Test)123 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)105 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)91 DimensionSpec (io.druid.query.dimension.DimensionSpec)64 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)59 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)56 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)56 InputRow (io.druid.data.input.InputRow)28 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)24 SelectorDimFilter (io.druid.query.filter.SelectorDimFilter)22 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)22 Benchmark (org.openjdk.jmh.annotations.Benchmark)21 BenchmarkMode (org.openjdk.jmh.annotations.BenchmarkMode)21 OutputTimeUnit (org.openjdk.jmh.annotations.OutputTimeUnit)21 GroupByQuery (io.druid.query.groupby.GroupByQuery)20 MapBasedRow (io.druid.data.input.MapBasedRow)19 OrderByColumnSpec (io.druid.query.groupby.orderby.OrderByColumnSpec)19 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)19