use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class DoublesSketchAggregatorTest method testFailureWhenMaxStreamLengthHit.
@Test
public void testFailureWhenMaxStreamLengthHit() throws Exception {
if (GroupByStrategySelector.STRATEGY_V1.equals(config.getDefaultStrategy())) {
expectedException.expect(new RecursiveExceptionMatcher(IllegalStateException.class));
expectedException.expectMessage("NullPointerException was thrown while updating Doubles sketch");
helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"sequenceNumber\", \"product\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128, \"maxStreamLength\": 10}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
} else {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"sequenceNumber\", \"product\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128, \"maxStreamLength\": 10}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
expectedException.expect(new RecursiveExceptionMatcher(IllegalStateException.class));
expectedException.expectMessage("NullPointerException was thrown while updating Doubles sketch");
seq.toList();
}
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class DoublesSketchAggregatorTest method buildingSketchesAtQueryTime.
@Test
public void buildingSketchesAtQueryTime() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"sequenceNumber\", \"product\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", " }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}," + "{\"type\": \"doubleSum\", \"name\": \"valueWithNulls\", \"fieldName\": \"valueWithNulls\"}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128},", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantileWithNulls\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Object sketchObject = row.get(0);
Assert.assertTrue(sketchObject instanceof Long);
long sketchValue = (long) sketchObject;
Assert.assertEquals(400, sketchValue);
Object sketchObjectWithNulls = row.get(1);
Assert.assertTrue(sketchObjectWithNulls instanceof Long);
long sketchValueWithNulls = (long) sketchObjectWithNulls;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 400 : 377, sketchValueWithNulls);
// post agg
Object quantileObject = row.get(2);
Assert.assertTrue(quantileObject instanceof Double);
// median value
Assert.assertEquals(0.5, (double) quantileObject, 0.05);
// post agg
Object quantilesObject = row.get(3);
Assert.assertTrue(quantilesObject instanceof double[]);
double[] quantiles = (double[]) quantilesObject;
// min value
Assert.assertEquals(0, quantiles[0], 0.05);
// median value
Assert.assertEquals(0.5, quantiles[1], 0.05);
// max value
Assert.assertEquals(1, quantiles[2], 0.05);
// post agg
Object histogramObject = row.get(4);
Assert.assertTrue(histogramObject instanceof double[]);
double[] histogram = (double[]) histogramObject;
for (final double bin : histogram) {
// 400 items uniformly
Assert.assertEquals(100, bin, 100 * 0.2);
// distributed into 4 bins
}
// post agg with nulls
Object quantileObjectWithNulls = row.get(5);
Assert.assertTrue(quantileObjectWithNulls instanceof Double);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.4 : 7.5, (double) quantileObjectWithNulls, 0.1);
// median value
// post agg with nulls
Object quantilesObjectWithNulls = row.get(6);
Assert.assertTrue(quantilesObjectWithNulls instanceof double[]);
double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls;
// min value
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 5.0, quantilesWithNulls[0], 0.05);
// median value
Assert.assertEquals(NullHandling.replaceWithDefault() ? 7.4 : 7.5, quantilesWithNulls[1], 0.1);
// max value
Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05);
// post agg with nulls
Object histogramObjectWithNulls = row.get(7);
Assert.assertTrue(histogramObjectWithNulls instanceof double[]);
double[] histogramWithNulls = (double[]) histogramObjectWithNulls;
for (final double bin : histogramWithNulls) {
// distribution is skewed due to nulls/0s
Assert.assertEquals(100, bin, 80);
// distributed into 4 bins
}
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class SketchAggregationTest method testRetentionDataIngestAndGpByQuery.
@Test
public void testRetentionDataIngestAndGpByQuery() throws Exception {
final GroupByQuery groupByQuery = readQueryFromClasspath("retention_test_data_group_by_query.json", helper.getObjectMapper(), vectorize);
final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("retention_test_data.tsv").getFile()), readFileFromClasspathAsString("simple_test_data_record_parser.json"), readFileFromClasspathAsString("simple_test_data_aggregators.json"), 0, Granularities.NONE, 5, groupByQuery);
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
Assert.assertEquals(ImmutableList.of(new MapBasedRow(DateTimes.of("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_1").put("p1_unique_country_day_1", 20.0).put("p1_unique_country_day_2", 20.0).put("p1_unique_country_day_3", 10.0).put("sketchEstimatePostAgg", 20.0).put("sketchIntersectionPostAggEstimate1", 10.0).put("sketchIntersectionPostAggEstimate2", 5.0).put("non_existing_col_validation", 0.0).build())).stream().map(row -> ResultRow.fromLegacyRow(row, groupByQuery)).collect(Collectors.toList()), results);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class SketchAggregationTest method testEmptySketchAggregateCombine.
@Test
public void testEmptySketchAggregateCombine() throws Exception {
final GroupByQuery groupByQuery = readQueryFromClasspath("empty_sketch_group_by_query.json", helper.getObjectMapper(), vectorize);
final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(SketchAggregationTest.class.getClassLoader().getResource("empty_sketch_data.tsv").getFile()), readFileFromClasspathAsString("empty_sketch_data_record_parser.json"), readFileFromClasspathAsString("empty_sketch_test_data_aggregators.json"), 0, Granularities.NONE, 5, groupByQuery);
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
Assert.assertEquals(ResultRow.fromLegacyRow(new MapBasedRow(DateTimes.of("2019-07-14T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("product", "product_b").put("sketch_count", 0.0).build()), groupByQuery), results.get(0));
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class HllSketchAggregatorTest method buildSketchesAtQueryTime.
@Test
public void buildSketchesAtQueryTime() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("hll/hll_raw.tsv").getFile()), buildParserJson(Arrays.asList("dim", "multiDim", "id"), Arrays.asList("timestamp", "dim", "multiDim", "id")), "[]", // minTimestamp
0, Granularities.NONE, // maxRowCount
200, buildGroupByQueryJson("HLLSketchBuild", "id", !ROUND));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Assert.assertEquals(200, (double) row.get(0), 0.1);
}
Aggregations