use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class TDigestSketchAggregatorTest method testIngestingSketches.
@Test
public void testIngestingSketches() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("doubles_sketch_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"product\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"product\", \"sketch\"]", " }", "}"), String.join("\n", "[", " {\"type\": \"tDigestSketch\", \"name\": \"first_level_merge_sketch\", \"fieldName\": \"sketch\", " + "\"compression\": " + "200}", "]"), // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"tDigestSketch\", \"name\": \"second_level_merge_sketch\", \"fieldName\": " + "\"first_level_merge_sketch\", \"compression\": " + "200}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"second_level_merge_sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
// post agg
// "quantiles"
Object quantilesObject = row.get(1);
Assert.assertTrue(quantilesObject instanceof double[]);
double[] quantiles = (double[]) quantilesObject;
// min value
Assert.assertEquals(0.001, quantiles[0], 0.0006);
// median value
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.47 : 0.5, quantiles[1], 0.05);
// max value
Assert.assertEquals(1, quantiles[2], 0.05);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class TDigestSketchAggregatorTest method buildingSketchesAtIngestionTime.
@Test
public void buildingSketchesAtIngestionTime() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"product\"],", " \"dimensionExclusions\": [ \"sequenceNumber\"],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", "}"), "[{\"type\": \"tDigestSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"compression\": 200}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"tDigestSketch\", \"name\": \"merged_sketch\", \"fieldName\": \"sketch\", " + "\"compression\": " + "200}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesFromTDigestSketch\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], " + "\"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"merged_sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
// post agg
// "quantiles"
Object quantilesObject = row.get(1);
Assert.assertTrue(quantilesObject instanceof double[]);
double[] quantiles = (double[]) quantilesObject;
// min value
Assert.assertEquals(0.001, quantiles[0], 0.0006);
// median value
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.47 : 0.5, quantiles[1], 0.05);
// max value
Assert.assertEquals(1, quantiles[2], 0.05);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class MapVirtualColumnGroupByTest method testWithSubColumn.
@Test
public void testWithSubColumn() {
final GroupByQuery query = new GroupByQuery(new TableDataSource(QueryRunnerTestHelper.DATA_SOURCE), new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2011/2012"))), VirtualColumns.create(ImmutableList.of(new MapVirtualColumn("keys", "values", "params"))), null, Granularities.ALL, ImmutableList.of(new DefaultDimensionSpec("params.key3", "params.key3")), ImmutableList.of(new CountAggregatorFactory("count")), null, null, null, null, null);
final List<ResultRow> result = runner.run(QueryPlus.wrap(query)).toList();
final List<ResultRow> expected = ImmutableList.of(new MapBasedRow(DateTimes.of("2011-01-12T00:00:00.000Z"), MapVirtualColumnTestBase.mapOf("count", 1L, "params.key3", "value3")), new MapBasedRow(DateTimes.of("2011-01-12T00:00:00.000Z"), MapVirtualColumnTestBase.mapOf("count", 2L))).stream().map(row -> ResultRow.fromLegacyRow(row, query)).collect(Collectors.toList());
Assert.assertEquals(expected, result);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class DoublesSketchAggregatorTest method buildingSketchesAtIngestionTime.
@Test
public void buildingSketchesAtIngestionTime() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"product\"],", " \"dimensionExclusions\": [ \"sequenceNumber\"],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\", \"valueWithNulls\"]", " }", "}"), "[{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}," + "{\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"valueWithNulls\", \"k\": 128}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"k\": 128},", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketchWithNulls\", \"fieldName\": \"sketchWithNulls\", \"k\": 128},", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"non_existent_sketch\", \"fieldName\": \"non_existent_sketch\", \"k\": 128}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantilesWithNulls\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogramWithNulls\", \"splitPoints\": [6.25, 7.5, 8.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchWithNulls\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Object sketchObject = row.get(0);
Assert.assertTrue(sketchObject instanceof Long);
long sketchValue = (long) sketchObject;
Assert.assertEquals(400, sketchValue);
Object sketchObjectWithNulls = row.get(1);
Assert.assertTrue(sketchObjectWithNulls instanceof Long);
long sketchValueWithNulls = (long) sketchObjectWithNulls;
Assert.assertEquals(377, sketchValueWithNulls);
// post agg
Object quantilesObject = row.get(3);
Assert.assertTrue(quantilesObject instanceof double[]);
double[] quantiles = (double[]) quantilesObject;
// min value
Assert.assertEquals(0, quantiles[0], 0.05);
// median value
Assert.assertEquals(0.5, quantiles[1], 0.05);
// max value
Assert.assertEquals(1, quantiles[2], 0.05);
// post agg
Object histogramObject = row.get(4);
Assert.assertTrue(histogramObject instanceof double[]);
double[] histogram = (double[]) histogramObject;
Assert.assertEquals(4, histogram.length);
for (final double bin : histogram) {
// 400 items uniformly distributed into 4 bins
Assert.assertEquals(100, bin, 100 * 0.2);
}
// post agg with nulls
Object quantilesObjectWithNulls = row.get(5);
Assert.assertTrue(quantilesObjectWithNulls instanceof double[]);
double[] quantilesWithNulls = (double[]) quantilesObjectWithNulls;
// min value
Assert.assertEquals(5.0, quantilesWithNulls[0], 0.05);
// median value
Assert.assertEquals(7.55, quantilesWithNulls[1], 0.05);
// max value
Assert.assertEquals(10.0, quantilesWithNulls[2], 0.05);
// post agg with nulls
Object histogramObjectWithNulls = row.get(6);
Assert.assertTrue(histogramObjectWithNulls instanceof double[]);
double[] histogramWithNulls = (double[]) histogramObjectWithNulls;
Assert.assertEquals(4, histogramWithNulls.length);
for (final double bin : histogramWithNulls) {
// distribution is skewed due to nulls
Assert.assertEquals(100, bin, 50);
}
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class DoublesSketchAggregatorTest method queryingDataWithFieldNameValueAsFloatInsteadOfSketch.
@Test
public void queryingDataWithFieldNameValueAsFloatInsteadOfSketch() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("quantiles/doubles_build_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"sequenceNumber\", \"product\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"sequenceNumber\", \"product\", \"value\"]", " }", "}"), "[{\"type\": \"doubleSum\", \"name\": \"value\", \"fieldName\": \"value\"}]", // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"quantilesDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"value\", \"k\": 128}", " ],", " \"postAggregations\": [", " {\"type\": \"quantilesDoublesSketchToQuantile\", \"name\": \"quantile\", \"fraction\": 0.5, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToQuantiles\", \"name\": \"quantiles\", \"fractions\": [0, 0.5, 1], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"quantilesDoublesSketchToHistogram\", \"name\": \"histogram\", \"splitPoints\": [0.25, 0.5, 0.75], \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}}", " ],", " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Object sketchObject = row.get(0);
Assert.assertTrue(sketchObject instanceof Long);
long sketchValue = (long) sketchObject;
Assert.assertEquals(400, sketchValue);
// post agg
Object quantileObject = row.get(1);
Assert.assertTrue(quantileObject instanceof Double);
// median value
Assert.assertEquals(0.5, (double) quantileObject, 0.05);
// post agg
Object quantilesObject = row.get(2);
Assert.assertTrue(quantilesObject instanceof double[]);
double[] quantiles = (double[]) quantilesObject;
// min value
Assert.assertEquals(0, quantiles[0], 0.05);
// median value
Assert.assertEquals(0.5, quantiles[1], 0.05);
// max value
Assert.assertEquals(1, quantiles[2], 0.05);
// post agg
Object histogramObject = row.get(3);
Assert.assertTrue(histogramObject instanceof double[]);
double[] histogram = (double[]) histogramObject;
for (final double bin : histogram) {
// 400 items uniformly
Assert.assertEquals(100, bin, 100 * 0.2);
// distributed into 4 bins
}
}
Aggregations