use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtQueryTimeWithNullsTest.
// Three buckets with null values
@Test
public void buildingSketchesAtQueryTimeWithNullsTest() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data_three_values_and_nulls.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"product\", \"key\"],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"product\", \"key\", \"value1\", \"value2\", \"value3\"]", " }", "}"), String.join("\n", "[", " {\"type\": \"doubleSum\", \"name\": \"value1\", \"fieldName\": \"value1\"},", " {\"type\": \"doubleSum\", \"name\": \"value2\", \"fieldName\": \"value2\"},", " {\"type\": \"doubleSum\", \"name\": \"value3\", \"fieldName\": \"value3\"}", "]"), // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"virtualColumns\": [{\"type\": \"expression\",\"name\": \"nonulls3\",\"expression\": \"nvl(value3, 0.0)\",\"outputType\": \"DOUBLE\"}],", " \"aggregations\": [", " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"value3\" ], \"nominalEntries\": 1024},", " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketchNoNulls\", \"fieldName\": \"key\", \"metricColumns\": [ \"value1\", \"value2\", \"nonulls3\" ], \"nominalEntries\": 1024}", " ],", " \"postAggregations\": [", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimateNoNulls\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}},", " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"union\",", " \"operation\": \"UNION\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 3,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"intersection\",", " \"operation\": \"INTERSECT\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 3,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"anotb\",", " \"operation\": \"NOT\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 3,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {", " \"type\": \"arrayOfDoublesSketchToMeans\",", " \"name\": \"means\",", " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", " },", " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch-with-no-nulls\", \"column\": 3, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketchNoNulls\"}}", " ],", " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Assert.assertEquals("sketch", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(0), 0);
Assert.assertEquals("sketchNoNulls", 40.0, (double) row.get(1), 0);
Assert.assertEquals("estimate", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(2), 0);
Assert.assertEquals("estimateNoNulls", 40.0, (double) row.get(3), 0);
Assert.assertEquals("union", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(5), 0);
Assert.assertEquals("intersection", NullHandling.replaceWithDefault() ? 40.0 : 30.0, (double) row.get(6), 0);
Assert.assertEquals("anotb", 0, (double) row.get(7), 0);
// means
Object meansObj = row.get(8);
Assert.assertTrue(meansObj instanceof double[]);
double[] means = (double[]) meansObj;
Assert.assertEquals(3, means.length);
Assert.assertEquals(1.0, means[0], 0);
Assert.assertEquals(2.0, means[1], 0);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 2.25 : 3.0, means[2], 0.1);
// quantiles-sketch
Object obj = row.get(4);
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
// quantiles-sketch-with-nulls
Object objSketch2 = row.get(9);
Assert.assertTrue(objSketch2 instanceof DoublesSketch);
DoublesSketch ds2 = (DoublesSketch) objSketch2;
Assert.assertEquals(NullHandling.replaceWithDefault() ? 40 : 30, ds2.getN());
Assert.assertEquals(NullHandling.replaceWithDefault() ? 0.0 : 3.0, ds2.getMinValue(), 0);
Assert.assertEquals(3.0, ds2.getMaxValue(), 0);
// quantiles-sketch-no-nulls
Object objSketch3 = row.get(10);
Assert.assertTrue(objSketch3 instanceof DoublesSketch);
DoublesSketch ds3 = (DoublesSketch) objSketch3;
Assert.assertEquals(40, ds3.getN());
Assert.assertEquals(0.0, ds3.getMinValue(), 0);
Assert.assertEquals(3.0, ds3.getMaxValue(), 0);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtQueryTimeTwoBucketsTest.
// Two buckets with statistically significant difference.
// See GenerateTestData class for details.
@Test
public void buildingSketchesAtQueryTimeTwoBucketsTest() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/bucket_test_data.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMdd\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"label\", \"userid\"]", " },", " \"columns\": [\"timestamp\", \"label\", \"userid\", \"parameter\"]", " }", "}"), String.join("\n", "[", " {\"type\": \"doubleSum\", \"name\": \"parameter\", \"fieldName\": \"parameter\"}", "]"), // minTimestamp
0, Granularities.NONE, // maxRowCount
2000, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {", " \"type\": \"filtered\",", " \"filter\": {\"type\": \"selector\", \"dimension\": \"label\", \"value\": \"test\"},", " \"aggregator\": {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch-test\", \"fieldName\": \"userid\", \"metricColumns\": [\"parameter\"]}", " },", " {", " \"type\": \"filtered\",", " \"filter\": {\"type\": \"selector\", \"dimension\": \"label\", \"value\": \"control\"},", " \"aggregator\": {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch-control\", \"fieldName\": \"userid\", \"metricColumns\": [\"parameter\"]}", " }", " ],", " \"postAggregations\": [", " {\"type\": \"arrayOfDoublesSketchTTest\",", " \"name\": \"p-value\", \"fields\": [", " {\"type\": \"fieldAccess\", \"fieldName\": \"sketch-test\"},", " {\"type\": \"fieldAccess\", \"fieldName\": \"sketch-control\"}", " ]", " }", " ],", " \"intervals\": [\"2017-01-01T00:00:00.000Z/2017-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
// p-value
Object obj = row.get(2);
Assert.assertTrue(obj instanceof double[]);
double[] array = (double[]) obj;
Assert.assertEquals(1, array.length);
double pValue = array[0];
// Test and control buckets were constructed to have different means, so we
// expect very low p value
Assert.assertEquals(0, pValue, 0.001);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class ArrayOfDoublesSketchAggregationTest method buildingSketchesAtIngestionTimeTwoValuesAndNumericalKey.
@Test
public void buildingSketchesAtIngestionTimeTwoValuesAndNumericalKey() throws Exception {
Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(this.getClass().getClassLoader().getResource("tuple/array_of_doubles_build_data_two_values_and_key_as_number.tsv").getFile()), String.join("\n", "{", " \"type\": \"string\",", " \"parseSpec\": {", " \"format\": \"tsv\",", " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", " \"dimensionsSpec\": {", " \"dimensions\": [\"product\", {\"type\": \"long\", \"name\": \"key_num\"}],", " \"dimensionExclusions\": [],", " \"spatialDimensions\": []", " },", " \"columns\": [\"timestamp\", \"product\", \"key\", \"key_num\", \"value1\", \"value2\"]", " }", "}"), String.join("\n", "[", " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"key_num\", \"metricColumns\": [ \"value1\", \"value2\" ], \"nominalEntries\": 1024}", "]"), // minTimestamp
0, Granularities.NONE, // maxRowCount
10, String.join("\n", "{", " \"queryType\": \"groupBy\",", " \"dataSource\": \"test_datasource\",", " \"granularity\": \"ALL\",", " \"dimensions\": [],", " \"aggregations\": [", " {\"type\": \"arrayOfDoublesSketch\", \"name\": \"sketch\", \"fieldName\": \"sketch\", \"nominalEntries\": 1024, \"numberOfValues\": 2}", " ],", " \"postAggregations\": [", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"estimate\", \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"arrayOfDoublesSketchToQuantilesSketch\", \"name\": \"quantiles-sketch\", \"column\": 2, \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"union\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"union\",", " \"operation\": \"UNION\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 2,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"intersection\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"intersection\",", " \"operation\": \"INTERSECT\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 2,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {\"type\": \"arrayOfDoublesSketchToEstimate\", \"name\": \"anotb\", \"field\": {", " \"type\": \"arrayOfDoublesSketchSetOp\",", " \"name\": \"anotb\",", " \"operation\": \"NOT\",", " \"nominalEntries\": 1024,", " \"numberOfValues\": 2,", " \"fields\": [{\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}, {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}]", " }},", " {", " \"type\": \"arrayOfDoublesSketchToMeans\",", " \"name\": \"means\",", " \"field\": {\"type\": \"fieldAccess\", \"fieldName\": \"sketch\"}", " }", " ],", " \"intervals\": [\"2015-01-01T00:00:00.000Z/2015-01-31T00:00:00.000Z\"]", "}"));
List<ResultRow> results = seq.toList();
Assert.assertEquals(1, results.size());
ResultRow row = results.get(0);
Assert.assertEquals("sketch", 40.0, (double) row.get(0), 0);
Assert.assertEquals("estimate", 40.0, (double) row.get(1), 0);
Assert.assertEquals("union", 40.0, (double) row.get(3), 0);
Assert.assertEquals("intersection", 40.0, (double) row.get(4), 0);
Assert.assertEquals("anotb", 0, (double) row.get(5), 0);
// means
Object meansObj = row.get(6);
Assert.assertTrue(meansObj instanceof double[]);
double[] means = (double[]) meansObj;
Assert.assertEquals(2, means.length);
Assert.assertEquals(1.0, means[0], 0);
Assert.assertEquals(2.0, means[1], 0);
// quantiles-sketch
Object obj = row.get(2);
Assert.assertTrue(obj instanceof DoublesSketch);
DoublesSketch ds = (DoublesSketch) obj;
Assert.assertEquals(40, ds.getN());
Assert.assertEquals(2.0, ds.getMinValue(), 0);
Assert.assertEquals(2.0, ds.getMaxValue(), 0);
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class FixedBucketsHistogramGroupByQueryTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
final GroupByQueryConfig v1Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1";
}
};
final GroupByQueryConfig v1SingleThreadedConfig = new GroupByQueryConfig() {
@Override
public boolean isSingleThreaded() {
return true;
}
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1SingleThreaded";
}
};
final GroupByQueryConfig v2Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
@Override
public String toString() {
return "v2";
}
};
v1Config.setMaxIntermediateRows(10000);
v1SingleThreadedConfig.setMaxIntermediateRows(10000);
final List<Object[]> constructors = new ArrayList<>();
final List<GroupByQueryConfig> configs = ImmutableList.of(v1Config, v1SingleThreadedConfig, v2Config);
for (GroupByQueryConfig config : configs) {
final Pair<GroupByQueryRunnerFactory, Closer> factoryAndCloser = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs;
RESOURCE_CLOSER.register(factoryAndCloser.rhs);
for (QueryRunner<ResultRow> runner : QueryRunnerTestHelper.makeQueryRunners(factory)) {
final String testName = StringUtils.format("config=%s, runner=%s", config.toString(), runner.toString());
constructors.add(new Object[] { testName, factory, runner });
}
}
return constructors;
}
use of org.apache.druid.query.groupby.ResultRow in project druid by druid-io.
the class GroupByBenchmark method querySingleQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
QueryRunner<ResultRow> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.queryableIndexes.get(0), SegmentId.dummy("qIndex")));
final Sequence<ResultRow> results = GroupByBenchmark.runQuery(factory, runner, query);
final ResultRow lastRow = results.accumulate(null, (accumulated, in) -> in);
blackhole.consume(lastRow);
}
Aggregations