Examples with HyperUniquesAggregatorFactory - org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory

Example 11 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class SchemaEvolutionTest method testHyperUniqueEvolutionTimeseries.

@Test
@Parameters(method = "doVectorize")
public void testHyperUniqueEvolutionTimeseries(boolean doVectorize) {
    final TimeseriesQueryRunnerFactory factory = QueryRunnerTestHelper.newTimeseriesQueryRunnerFactory();
    final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).intervals("1000/3000").aggregators(ImmutableList.of(new HyperUniquesAggregatorFactory("uniques", "uniques"))).context(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, doVectorize)).build();
    // index1 has no "uniques" column
    Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 0d)), runQuery(query, factory, ImmutableList.of(index1)));
    // index1 (no uniques) + index2 and index3 (yes uniques); we should be able to combine
    Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 4.003911343725148d)), runQuery(query, factory, ImmutableList.of(index1, index2, index3)));
}

Also used : TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) Parameters(junitparams.Parameters) Test(org.junit.Test)

Example 12 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.

private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
    config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
    config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
    return config;
}

Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 13 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class IndexGeneratorCombinerTest method setUp.

@Before
public void setUp() throws Exception {
    HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")))), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Intervals.of("2010/2011"))), null, HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
    Configuration hadoopConfig = new Configuration();
    hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
    EasyMock.replay(context);
    aggregators = config.getSchema().getDataSchema().getAggregators();
    combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
    combiner.setup(context);
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Reducer(org.apache.hadoop.mapreduce.Reducer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Before(org.junit.Before)

Example 14 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class InputRowSerdeTest method testThrowParseExceptions.

@Test
public void testThrowParseExceptions() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
    InputRowSerde.SerializeResult result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
    Assert.assertEquals(Collections.singletonList("Unable to parse value[m3v] for field[m3]"), result.getParseExceptionMessages());
}

Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Example 15 with HyperUniquesAggregatorFactory

use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class InputRowSerdeTest method testSerde.

@Test
public void testSerde() {
    // Prepare the mocks & set close() call count expectation to 1
    final Aggregator mockedAggregator = EasyMock.createMock(DoubleSumAggregator.class);
    EasyMock.expect(mockedAggregator.isNull()).andReturn(false).times(1);
    EasyMock.expect(mockedAggregator.getDouble()).andReturn(0d).times(1);
    mockedAggregator.aggregate();
    EasyMock.expectLastCall().times(1);
    mockedAggregator.close();
    EasyMock.expectLastCall().times(1);
    EasyMock.replay(mockedAggregator);
    final Aggregator mockedNullAggregator = EasyMock.createMock(DoubleSumAggregator.class);
    EasyMock.expect(mockedNullAggregator.isNull()).andReturn(true).times(1);
    mockedNullAggregator.aggregate();
    EasyMock.expectLastCall().times(1);
    mockedNullAggregator.close();
    EasyMock.expectLastCall().times(1);
    EasyMock.replay(mockedNullAggregator);
    final AggregatorFactory mockedAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
    EasyMock.expect(mockedAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedAggregator);
    EasyMock.expect(mockedAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
    EasyMock.expect(mockedAggregatorFactory.getName()).andReturn("mockedAggregator").anyTimes();
    final AggregatorFactory mockedNullAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
    EasyMock.expect(mockedNullAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedNullAggregator);
    EasyMock.expect(mockedNullAggregatorFactory.getName()).andReturn("mockedNullAggregator").anyTimes();
    EasyMock.expect(mockedNullAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
    EasyMock.replay(mockedAggregatorFactory, mockedNullAggregatorFactory);
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3"), mockedAggregatorFactory, mockedNullAggregatorFactory };
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
    byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories).getSerializedRow();
    InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories);
    Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
    Assert.assertEquals(dims, out.getDimensions());
    Assert.assertEquals(Collections.emptyList(), out.getDimension("dim_non_existing"));
    Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
    Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
    Assert.assertEquals(200L, out.getRaw("d3"));
    Assert.assertEquals(300.1f, out.getRaw("d4"));
    Assert.assertEquals(400.5d, out.getRaw("d5"));
    Assert.assertEquals(NullHandling.defaultDoubleValue(), out.getMetric("agg_non_existing"));
    Assert.assertEquals(5.0f, out.getMetric("m1out").floatValue(), 0.00001);
    Assert.assertEquals(100L, out.getMetric("m2out"));
    Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
    Assert.assertEquals(NullHandling.defaultLongValue(), out.getMetric("unparseable"));
    EasyMock.verify(mockedAggregator);
    EasyMock.verify(mockedNullAggregator);
}

Also used : ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DoubleSumAggregator(org.apache.druid.query.aggregation.DoubleSumAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test)

Aggregations

HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)16 Test (org.junit.Test)12 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)10 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 File (java.io.File)5 ArrayList (java.util.ArrayList)5 List (java.util.List)5 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 CardinalityAggregatorFactory (org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)5 Before (org.junit.Before)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 Map (java.util.Map)4 InputRow (org.apache.druid.data.input.InputRow)4 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)4 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)4