use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class CalciteQueryTest method testApproxCountDistinct.
@Test
public void testApproxCountDistinct() throws Exception {
// Cannot vectorize due to virtual columns.
cannotVectorize();
testQuery("SELECT\n" + " SUM(cnt),\n" + // uppercase
" APPROX_COUNT_DISTINCT(dim2),\n" + // lowercase; also, filtered
" approx_count_distinct(dim2) FILTER(WHERE dim2 <> ''),\n" + // on extractionFn
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1)),\n" + // on expression
" APPROX_COUNT_DISTINCT(SUBSTRING(dim2, 1, 1) || 'x'),\n" + // on native hyperUnique column
" approx_count_distinct(unique_dim1)\n" + "FROM druid.foo", ImmutableList.of(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).virtualColumns(expressionVirtualColumn("v0", "concat(substring(\"dim2\", 0, 1),'x')", ColumnType.STRING)).aggregators(aggregators(new LongSumAggregatorFactory("a0", "cnt"), new CardinalityAggregatorFactory("a1", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), new FilteredAggregatorFactory(new CardinalityAggregatorFactory("a2", null, dimensions(new DefaultDimensionSpec("dim2", "dim2")), false, true), not(selector("dim2", "", null))), new CardinalityAggregatorFactory("a3", null, dimensions(new ExtractionDimensionSpec("dim2", "dim2", ColumnType.STRING, new SubstringDimExtractionFn(0, 1))), false, true), new CardinalityAggregatorFactory("a4", null, dimensions(new DefaultDimensionSpec("v0", "v0", ColumnType.STRING)), false, true), new HyperUniquesAggregatorFactory("a5", "unique_dim1", false, true))).context(QUERY_CONTEXT_DEFAULT).build()), NullHandling.replaceWithDefault() ? ImmutableList.of(new Object[] { 6L, 3L, 2L, 2L, 2L, 6L }) : ImmutableList.of(new Object[] { 6L, 3L, 2L, 1L, 1L, 6L }));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class TimeseriesQueryRunnerTest method testTimeseriesCardinalityAggOnHyperUnique.
@Test
public void testTimeseriesCardinalityAggOnHyperUnique() {
// Cardinality aggregator on complex columns (like hyperUnique) returns 0.
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(QueryRunnerTestHelper.DATA_SOURCE).intervals(QueryRunnerTestHelper.FIRST_TO_THIRD).aggregators(QueryRunnerTestHelper.ROWS_COUNT, new CardinalityAggregatorFactory("cardinality", ImmutableList.of(DefaultDimensionSpec.of("quality_uniques")), false), new HyperUniquesAggregatorFactory("hyperUnique", "quality_uniques", false, false)).granularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<Result<TimeseriesResultValue>> expectedResults = Collections.singletonList(new Result<>(DateTimes.of("2011-04-01"), new TimeseriesResultValue(ImmutableMap.of("rows", 26L, "cardinality", NullHandling.replaceWithDefault() ? 1.0002442201269182 : 0.0d, "hyperUnique", 9.019833517963864d))));
Iterable<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
assertExpectedResults(expectedResults, results);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SegmentAnalyzerTest method testAnalyzingSegmentWithNonExistentAggregator.
/**
* This test verifies that if a segment was created using an unknown/invalid aggregator
* (which can happen if an aggregator was removed for a later version), then,
* analyzing the segment doesn't fail and the result of analysis of the complex column
* is reported as an error.
* @throws IOException
*/
@Test
public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException {
final URL resource = SegmentAnalyzerTest.class.getClassLoader().getResource("druid.sample.numeric.tsv");
CharSource source = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
String invalid_aggregator = "invalid_aggregator";
AggregatorFactory[] metrics = new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.DOUBLE_METRICS[0], "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality"), new InvalidAggregatorFactory(invalid_aggregator, "quality") };
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(TestIndex.DIMENSIONS_SPEC).withMetrics(metrics).withRollup(true).build();
final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source);
// Analyze the in-memory segment.
{
SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds"));
Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
Assert.assertFalse(columnAnalysis.isError());
Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
}
// Persist the index.
final File segmentFile = TestIndex.INDEX_MERGER.persist(incrementalIndex, temporaryFolder.newFolder(), TestIndex.INDEX_SPEC, null);
// Unload the complex serde, then analyze the persisted segment.
ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE);
{
SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
QueryableIndexSegment segment = new QueryableIndexSegment(TestIndex.INDEX_IO.loadIndex(segmentFile), SegmentId.dummy("ds"));
Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator);
Assert.assertTrue(invalidColumnAnalysis.isError());
Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage());
// Run a segment metadata query also to verify it doesn't break
final List<SegmentAnalysis> results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
for (SegmentAnalysis result : results) {
Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError());
}
}
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testGroupByCardinalityAggOnHyperUnique.
@Test
public void testGroupByCardinalityAggOnHyperUnique() {
// Cardinality aggregator on complex columns (like hyperUnique) returns 0.
GroupByQuery query = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new CardinalityAggregatorFactory("cardinality", ImmutableList.of(DefaultDimensionSpec.of("quality_uniques")), false), new HyperUniquesAggregatorFactory("hyperUnique", "quality_uniques", false, false)).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Collections.singletonList(makeRow(query, "2011-04-01", "rows", 26L, "cardinality", NullHandling.replaceWithDefault() ? 1.0002442201269182 : 0.0d, "hyperUnique", 9.019833517963864d));
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "cardinality-agg");
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithHyperUniques.
@Test
public void testSubqueryWithHyperUniques() {
GroupByQuery subquery = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality_uniques")).setGranularity(QueryRunnerTestHelper.DAY_GRAN).build();
GroupByQuery query = makeQueryBuilder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.FIRST_TO_THIRD).setDimensions(new DefaultDimensionSpec("alias", "alias")).setAggregatorSpecs(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"), new HyperUniquesAggregatorFactory("uniq", "quality_uniques")).setGranularity(QueryRunnerTestHelper.ALL_GRAN).build();
List<ResultRow> expectedResults = Arrays.asList(makeRow(query, "2011-04-01", "alias", "automotive", "rows", 2L, "idx", 282L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "business", "rows", 2L, "idx", 230L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 324L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "health", "rows", 2L, "idx", 233L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 5317L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "news", "rows", 2L, "idx", 235L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "premium", "rows", 6L, "idx", 5405L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "technology", "rows", 2L, "idx", 175L, "uniq", 1.0002442201269182), makeRow(query, "2011-04-01", "alias", "travel", "rows", 2L, "idx", 245L, "uniq", 1.0002442201269182));
// Subqueries are handled by the ToolChest
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "subquery-hyperunique");
}
Aggregations