Examples with HyperUniquesAggregatorFactory - org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory

Example 21 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class TopNQueryQueryToolChestTest method doTestCacheStrategyOrderByPost.

private void doTestCacheStrategyOrderByPost(final ColumnType valueType, final Object dimValue) throws IOException {
    CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> strategy = new TopNQueryQueryToolChest(null, null).getCacheStrategy(new TopNQuery(new TableDataSource("dummy"), VirtualColumns.EMPTY, new DefaultDimensionSpec("test", "test", valueType), new NumericTopNMetricSpec("post"), 3, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))), null, Granularities.ALL, ImmutableList.of(new HyperUniquesAggregatorFactory("metric1", "test", false, false), new CountAggregatorFactory("metric2")), ImmutableList.of(new ArithmeticPostAggregator("post", "+", ImmutableList.of(new FinalizingFieldAccessPostAggregator("metric1", "metric1"), new FieldAccessPostAggregator("metric2", "metric2")))), null));
    HyperLogLogCollector collector = getIntermediateHllCollector(valueType.getType(), dimValue);
    final Result<TopNResultValue> result1 = new Result<>(// test timestamps that result in integer size millis
    DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector, "metric2", 2, "post", collector.estimateCardinality() + 2))));
    Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result1);
    ObjectMapper objectMapper = TestHelper.makeJsonMapper();
    Object fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
    Result<TopNResultValue> fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue);
    Assert.assertEquals(result1, fromCacheResult);
    final Result<TopNResultValue> resultLevelCacheResult = new Result<>(// test timestamps that result in integer size millis
    DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector.estimateCardinality(), "metric2", 2, "post", collector.estimateCardinality() + 2))));
    Object preparedResultCacheValue = strategy.prepareForCache(true).apply(resultLevelCacheResult);
    Object fromResultCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedResultCacheValue), strategy.getCacheObjectClazz());
    Result<TopNResultValue> fromResultCacheResult = strategy.pullFromCache(true).apply(fromResultCacheValue);
    Assert.assertEquals(resultLevelCacheResult, fromResultCacheResult);
}

Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Result(org.apache.druid.query.Result) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 22 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class BatchDeltaIngestionTest method testReindexingWithNewAggregators.

/**
 * By default re-indexing expects same aggregators as used by original indexing job. But, with additional flag
 * "useNewAggs" in DatasourcePathSpec, user can optionally have any set of aggregators.
 * See https://github.com/apache/druid/issues/5277 .
 */
@Test
public void testReindexingWithNewAggregators() throws Exception {
    List<WindowedDataSegment> segments = ImmutableList.of(new WindowedDataSegment(SEGMENT, INTERVAL_FULL));
    AggregatorFactory[] aggregators = new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum2", "visited_sum"), new HyperUniquesAggregatorFactory("unique_hosts2", "unique_hosts") };
    Map<String, Object> inputSpec = ImmutableMap.of("type", "dataSource", "ingestionSpec", ImmutableMap.of("dataSource", "testds", "interval", INTERVAL_FULL), "segments", segments, "useNewAggs", true);
    File tmpDir = temporaryFolder.newFolder();
    HadoopDruidIndexerConfig config = makeHadoopDruidIndexerConfig(inputSpec, tmpDir, aggregators);
    List<ImmutableMap<String, Object>> expectedRows = ImmutableList.of(ImmutableMap.of("time", DateTimes.of("2014-10-22T00:00:00.000Z"), "host", ImmutableList.of("a.example.com"), "visited_sum2", 100L, "unique_hosts2", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T01:00:00.000Z"), "host", ImmutableList.of("b.example.com"), "visited_sum2", 150L, "unique_hosts2", 1.0d), ImmutableMap.of("time", DateTimes.of("2014-10-22T02:00:00.000Z"), "host", ImmutableList.of("c.example.com"), "visited_sum2", 200L, "unique_hosts2", 1.0d));
    testIngestion(config, expectedRows, Iterables.getOnlyElement(segments), ImmutableList.of("host"), ImmutableList.of("visited_sum2", "unique_hosts2"));
}

Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) WindowedDataSegment(org.apache.druid.indexer.hadoop.WindowedDataSegment) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) File(java.io.File) Test(org.junit.Test)

Example 23 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class SchemaEvolutionTest method setUp.

@Before
public void setUp() throws IOException {
    NullHandling.initializeForTests();
    // Index1: c1 is a string, c2 nonexistent, "uniques" nonexistent
    index1 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c1"))).buildMMappedIndex();
    // Index2: c1 is a long, c2 is a string, "uniques" is uniques on c2, "longmin" is min on c1
    index2 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2"), new LongMinAggregatorFactory("longmin", "c1")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
    // Index3: c1 is a float, c2 is a string, "uniques" is uniques on c2
    index3 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
    // Index4: c1 is nonexistent, c2 is uniques on c2
    index4 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new HyperUniquesAggregatorFactory("c2", "c2")).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of())).buildMMappedIndex();
    if (index4.getAvailableDimensions().size() != 0) {
        // Just double-checking that the exclusions are working properly
        throw new ISE("Expected no dimensions in index4");
    }
}

Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ISE(org.apache.druid.java.util.common.ISE) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) LongMinAggregatorFactory(org.apache.druid.query.aggregation.LongMinAggregatorFactory) Before(org.junit.Before)

Example 24 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class MetricManipulatorFnsTest method constructorFeeder.

@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
    final ArrayList<Object[]> constructorArrays = new ArrayList<>();
    final long longVal = 13789;
    LongMinAggregator longMinAggregator = new LongMinAggregator(new TestLongColumnSelector() {

        @Override
        public long getLong() {
            return longVal;
        }

        @Override
        public boolean isNull() {
            return false;
        }
    });
    LongMinAggregatorFactory longMinAggregatorFactory = new LongMinAggregatorFactory(NAME, FIELD);
    constructorArrays.add(new Object[] { longMinAggregatorFactory, longMinAggregator, longMinAggregator, longMinAggregator, longVal, longVal });
    HyperUniquesAggregatorFactory hyperUniquesAggregatorFactory = new HyperUniquesAggregatorFactory(NAME, FIELD);
    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    collector.add((short) 1, (byte) 5);
    constructorArrays.add(new Object[] { hyperUniquesAggregatorFactory, collector, collector, collector.estimateCardinality(), collector.toByteArray(), collector });
    LongSumAggregatorFactory longSumAggregatorFactory = new LongSumAggregatorFactory(NAME, FIELD);
    LongSumAggregator longSumAggregator = new LongSumAggregator(new TestLongColumnSelector() {

        @Override
        public long getLong() {
            return longVal;
        }

        @Override
        public boolean isNull() {
            return false;
        }
    });
    constructorArrays.add(new Object[] { longSumAggregatorFactory, longSumAggregator, longSumAggregator, longSumAggregator, longVal, longVal });
    for (Object[] argList : constructorArrays) {
        Assert.assertEquals(StringUtils.format("Arglist %s is too short. Expected 6 found %d", Arrays.toString(argList), argList.length), 6, argList.length);
    }
    return constructorArrays;
}

Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) ArrayList(java.util.ArrayList) TestLongColumnSelector(org.apache.druid.segment.TestLongColumnSelector) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)

Example 25 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class CachingClusteredClientTest method testGroupByCaching.

@Test
public void testGroupByCaching() {
    List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
    final HashFunction hashFn = Hashing.murmur3_128();
    GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(new DefaultDimensionSpec("a", "a")).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
    final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    collector.add(hashFn.hashString("abc123", StandardCharsets.UTF_8).asBytes());
    collector.add(hashFn.hashString("123abc", StandardCharsets.UTF_8).asBytes());
    final GroupByQuery query = builder.randomQueryId().build();
    testQueryCaching(getDefaultQueryRunner(), query, Intervals.of("2011-01-01/2011-01-02"), makeGroupByResults(query, DateTimes.of("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), Intervals.of("2011-01-02/2011-01-03"), makeGroupByResults(query, DateTimes.of("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), Intervals.of("2011-01-05/2011-01-10"), makeGroupByResults(query, DateTimes.of("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), Intervals.of("2011-01-05/2011-01-10"), makeGroupByResults(query, DateTimes.of("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
    QueryRunner runner = new FinalizeResultsQueryRunner(getDefaultQueryRunner(), WAREHOUSE.getToolChest(query));
    TestHelper.assertExpectedObjects(makeGroupByResults(query, DateTimes.of("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), DateTimes.of("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), DateTimes.of("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), DateTimes.of("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), DateTimes.of("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), DateTimes.of("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(QueryPlus.wrap(builder.randomQueryId().setInterval("2011-01-05/2011-01-10").build())), "");
}

Also used : GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) HashFunction(com.google.common.hash.HashFunction) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) QueryRunner(org.apache.druid.query.QueryRunner) Test(org.junit.Test)

Aggregations

HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 Test (org.junit.Test)12 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)10 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 CardinalityAggregatorFactory (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)5 Before (org.junit.Before)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 Map (java.util.Map)4 InputRow (org.apache.druid.data.input.InputRow)4 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)4 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)4