use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class TopNQueryQueryToolChestTest method doTestCacheStrategyOrderByPost.
private void doTestCacheStrategyOrderByPost(final ColumnType valueType, final Object dimValue) throws IOException {
CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> strategy = new TopNQueryQueryToolChest(null, null).getCacheStrategy(new TopNQuery(new TableDataSource("dummy"), VirtualColumns.EMPTY, new DefaultDimensionSpec("test", "test", valueType), new NumericTopNMetricSpec("post"), 3, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))), null, Granularities.ALL, ImmutableList.of(new HyperUniquesAggregatorFactory("metric1", "test", false, false), new CountAggregatorFactory("metric2")), ImmutableList.of(new ArithmeticPostAggregator("post", "+", ImmutableList.of(new FinalizingFieldAccessPostAggregator("metric1", "metric1"), new FieldAccessPostAggregator("metric2", "metric2")))), null));
HyperLogLogCollector collector = getIntermediateHllCollector(valueType.getType(), dimValue);
final Result<TopNResultValue> result1 = new Result<>(// test timestamps that result in integer size millis
DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector, "metric2", 2, "post", collector.estimateCardinality() + 2))));
Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result1);
ObjectMapper objectMapper = TestHelper.makeJsonMapper();
Object fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
Result<TopNResultValue> fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue);
Assert.assertEquals(result1, fromCacheResult);
final Result<TopNResultValue> resultLevelCacheResult = new Result<>(// test timestamps that result in integer size millis
DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector.estimateCardinality(), "metric2", 2, "post", collector.estimateCardinality() + 2))));
Object preparedResultCacheValue = strategy.prepareForCache(true).apply(resultLevelCacheResult);
Object fromResultCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedResultCacheValue), strategy.getCacheObjectClazz());
Result<TopNResultValue> fromResultCacheResult = strategy.pullFromCache(true).apply(fromResultCacheValue);
Assert.assertEquals(resultLevelCacheResult, fromResultCacheResult);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class BatchDeltaIngestionTest method testReindexingWithNewAggregators.
/**
* By default re-indexing expects same aggregators as used by original indexing job. But, with additional flag
* "useNewAggs" in DatasourcePathSpec, user can optionally have any set of aggregators.
* See https://github.com/apache/druid/issues/5277 .
*/
@Test
public void testReindexingWithNewAggregators() throws Exception {
List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_FULL));
AggregatorFactory[] aggregators = new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum2", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts2", "unique_hosts") };
Map<String, Object> inputSpec = ImmutableMap.of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "testds", "interval", INTERVAL_FULL), "segments", segments, "useNewAggs", true);
File tmpDir = temporaryFolder.newFolder();
HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(inputSpec, tmpDir, aggregators);
List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.of("time", DateTimes.of("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum2", 100L, "unique_hosts2", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum2", 150L, "unique_hosts2", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T02:00:00.000Z"), "host", ImmutableList.of("c.example.com"), "visited_sum2", 200L, "unique_hosts2", 1.0d));
testIngestion(config, expectedRows, Iterables.getOnlyElement(segments), ImmutableList.of("host"), ImmutableList.of("visited_sum2", "unique_hosts2"));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SchemaEvolutionTest method setUp.
@Before
public void setUp() throws IOException {
NullHandling.initializeForTests();
// Index1: c1 is a string, c2 nonexistent, "uniques" nonexistent
index1 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c1"))).buildMMappedIndex();
// Index2: c1 is a long, c2 is a string, "uniques" is uniques on c2, "longmin" is min on c1
index2 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2"), new LongMinAggregatorFactory("longmin", "c1")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index3: c1 is a float, c2 is a string, "uniques" is uniques on c2
index3 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index4: c1 is nonexistent, c2 is uniques on c2
index4 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new HyperUniquesAggregatorFactory("c2", "c2")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of())).buildMMappedIndex();
if (index4.getAvailableDimensions().size() != 0) {
// Just double-checking that the exclusions are working properly
throw new ISE("Expected no dimensions in index4");
}
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class MetricManipulatorFnsTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
final ArrayList<Object[]> constructorArrays = new ArrayList<>();
final long longVal = 13789;
LongMinAggregator longMinAggregator = new LongMinAggregator(new TestLongColumnSelector() {
@Override
public long getLong() {
return longVal;
}
@Override
public boolean isNull() {
return false;
}
});
LongMinAggregatorFactory longMinAggregatorFactory = new LongMinAggregatorFactory(NAME, FIELD);
constructorArrays.add(new Object[] { longMinAggregatorFactory, longMinAggregator, longMinAggregator, longMinAggregator, longVal, longVal });
HyperUniquesAggregatorFactory hyperUniquesAggregatorFactory = new HyperUniquesAggregatorFactory(NAME, FIELD);
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add((short) 1, (byte) 5);
constructorArrays.add(new Object[] { hyperUniquesAggregatorFactory, collector, collector, collector.estimateCardinality(), collector.toByteArray(), collector });
LongSumAggregatorFactory longSumAggregatorFactory = new LongSumAggregatorFactory(NAME, FIELD);
LongSumAggregator longSumAggregator = new LongSumAggregator(new TestLongColumnSelector() {
@Override
public long getLong() {
return longVal;
}
@Override
public boolean isNull() {
return false;
}
});
constructorArrays.add(new Object[] { longSumAggregatorFactory, longSumAggregator, longSumAggregator, longSumAggregator, longVal, longVal });
for (Object[] argList : constructorArrays) {
Assert.assertEquals(StringUtils.format("Arglist %s is too short. Expected 6 found %d", Arrays.toString(argList), argList.length), 6, argList.length);
}
return constructorArrays;
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(new DefaultDimensionSpec("a", "a")).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", StandardCharsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", StandardCharsets.UTF_8).asBytes());
final GroupByQuery query = builder.randomQueryId().build();
testQueryCaching(getDefaultQueryRunner(), query, Intervals.of("2011-01-01/2011-01-02"), makeGroupByResults(query, DateTimes.of("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), Intervals.of("2011-01-02/2011-01-03"), makeGroupByResults(query, DateTimes.of("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), Intervals.of("2011-01-05/2011-01-10"), makeGroupByResults(query, DateTimes.of("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), Intervals.of("2011-01-05/2011-01-10"), makeGroupByResults(query, DateTimes.of("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), WAREHOUSE.getToolChest(query));
TestHelper.assertExpectedObjects(makeGroupByResults(query, DateTimes.of("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), DateTimes.of("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(QueryPlus.wrap(builder.randomQueryId().setInterval("2011-01-05/2011-01-10").build())), "");
}
Aggregations