use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class MetricManipulatorFnsTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
final ArrayList<Object[]> constructorArrays = new ArrayList<>();
final long longVal = 13789;
LongMinAggregator longMinAggregator = new LongMinAggregator(new TestLongColumnSelector() {
@Override
public long get() {
return longVal;
}
});
LongMinAggregatorFactory longMinAggregatorFactory = new LongMinAggregatorFactory(NAME, FIELD);
constructorArrays.add(new Object[] { longMinAggregatorFactory, longMinAggregator, longMinAggregator, longMinAggregator, longVal, longVal });
HyperUniquesAggregatorFactory hyperUniquesAggregatorFactory = new HyperUniquesAggregatorFactory(NAME, FIELD);
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add((short) 1, (byte) 5);
constructorArrays.add(new Object[] { hyperUniquesAggregatorFactory, collector, collector, collector.estimateCardinality(), collector.toByteArray(), collector });
LongSumAggregatorFactory longSumAggregatorFactory = new LongSumAggregatorFactory(NAME, FIELD);
LongSumAggregator longSumAggregator = new LongSumAggregator(new TestLongColumnSelector() {
@Override
public long get() {
return longVal;
}
});
constructorArrays.add(new Object[] { longSumAggregatorFactory, longSumAggregator, longSumAggregator, longSumAggregator, longVal, longVal });
for (Object[] argList : constructorArrays) {
Assert.assertEquals(String.format("Arglist %s is too short. Expected 6 found %d", Arrays.toString(argList), argList.length), 6, argList.length);
}
return constructorArrays;
}
use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SchemaEvolutionTest method setUp.
@Before
public void setUp() throws IOException {
// Index1: c1 is a string, c2 nonexistent, "uniques" nonexistent
index1 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c1"))).buildMMappedIndex();
// Index2: c1 is a long, c2 is a string, "uniques" is uniques on c2
index2 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index3: c1 is a float, c2 is a string, "uniques" is uniques on c2
index3 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index4: c1 is nonexistent, c2 is uniques on c2
index4 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new HyperUniquesAggregatorFactory("c2", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.<String>of())).buildMMappedIndex();
if (index4.getAvailableDimensions().size() != 0) {
// Just double-checking that the exclusions are working properly
throw new ISE("WTF?! Expected no dimensions in index4");
}
}
use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SchemaEvolutionTest method testHyperUniqueEvolutionTimeseries.
@Test
public void testHyperUniqueEvolutionTimeseries() {
final TimeseriesQueryRunnerFactory factory = QueryRunnerTestHelper.newTimeseriesQueryRunnerFactory();
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).intervals("1000/3000").aggregators(ImmutableList.<AggregatorFactory>of(new HyperUniquesAggregatorFactory("uniques", "uniques"))).build();
// index1 has no "uniques" column
Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 0)), runQuery(query, factory, ImmutableList.of(index1)));
// index1 (no uniques) + index2 and index3 (yes uniques); we should be able to combine
Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 4.003911343725148d)), runQuery(query, factory, ImmutableList.of(index1, index2, index3)));
}
use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of DimExtractionTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// DimExtractionTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithHyperUniquesPostAggregator.
@Test
public void testSubqueryWithHyperUniquesPostAggregator() {
GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList()).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new HyperUniquesAggregatorFactory("quality_uniques_inner", "quality_uniques"))).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new FieldAccessPostAggregator("quality_uniques_inner_post", "quality_uniques_inner"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList()).setAggregatorSpecs(Arrays.asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"), new HyperUniquesAggregatorFactory("quality_uniques_outer", "quality_uniques_inner_post"))).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new HyperUniqueFinalizingPostAggregator("quality_uniques_outer_post", "quality_uniques_outer"))).setGranularity(QueryRunnerTestHelper.allGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "rows", 26L, "idx", 12446L, "quality_uniques_outer", 9.019833517963864, "quality_uniques_outer_post", 9.019833517963864));
// Subqueries are handled by the ToolChest
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Aggregations