use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class SchemaEvolutionTest method testHyperUniqueEvolutionTimeseries.
@Test
@Parameters(method = "doVectorize")
public void testHyperUniqueEvolutionTimeseries(boolean doVectorize) {
final TimeseriesQueryRunnerFactory factory = QueryRunnerTestHelper.newTimeseriesQueryRunnerFactory();
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).intervals("1000/3000").aggregators(ImmutableList.of(new HyperUniquesAggregatorFactory("uniques", "uniques"))).context(ImmutableMap.of(QueryContexts.VECTORIZE_KEY, doVectorize)).build();
// index1 has no "uniques" column
Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 0d)), runQuery(query, factory, ImmutableList.of(index1)));
// index1 (no uniques) + index2 and index3 (yes uniques); we should be able to combine
Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 4.003911343725148d)), runQuery(query, factory, ImmutableList.of(index1, index2, index3)));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class BatchDeltaIngestionTest method makeHadoopDruidIndexerConfig.
private HadoopDruidIndexerConfig makeHadoopDruidIndexerConfig(Map<String, Object> inputSpec, File tmpDir, AggregatorFactory[] aggregators) throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", MAPPER.convertValue(new StringInputRowParser(new CSVParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host"))), null, ImmutableList.of("timestamp", "host", "host2", "visited_num"), false, 0), null), Map.class), aggregators != null ? aggregators : new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited_num"), new HyperUniquesAggregatorFactory("unique_hosts", "host2") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(INTERVAL_FULL)), null, MAPPER), new HadoopIOConfig(inputSpec, null, tmpDir.getCanonicalPath()), new HadoopTuningConfig(tmpDir.getCanonicalPath(), null, null, null, null, null, null, null, null, false, false, false, false, null, false, false, null, null, false, false, null, null, null, null, null)));
config.setShardSpecs(ImmutableMap.of(INTERVAL_FULL.getStartMillis(), ImmutableList.of(new HadoopyShardSpec(new HashBasedNumberedShardSpec(0, 1, 0, 1, null, HashPartitionFunction.MURMUR3_32_ABS, HadoopDruidIndexerConfig.JSON_MAPPER), 0))));
config = HadoopDruidIndexerConfig.fromSpec(config.getSchema());
return config;
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class IndexGeneratorCombinerTest method setUp.
@Before
public void setUp() throws Exception {
HadoopDruidIndexerConfig config = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema("website", HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "yyyyMMddHH", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("host", "keywords")))), null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("visited_sum", "visited"), new HyperUniquesAggregatorFactory("unique_hosts", "host") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, ImmutableList.of(Intervals.of("2010/2011"))), null, HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.of("paths", "/tmp/dummy", "type", "static"), null, "/tmp/dummy"), HadoopTuningConfig.makeDefaultTuningConfig().withWorkingPath("/tmp/work").withVersion("ver")));
Configuration hadoopConfig = new Configuration();
hadoopConfig.set(HadoopDruidIndexerConfig.CONFIG_PROPERTY, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(config));
Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
EasyMock.expect(context.getConfiguration()).andReturn(hadoopConfig);
EasyMock.replay(context);
aggregators = config.getSchema().getDataSchema().getAggregators();
combiner = new IndexGeneratorJob.IndexGeneratorCombiner();
combiner.setup(context);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class InputRowSerdeTest method testThrowParseExceptions.
@Test
public void testThrowParseExceptions() {
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3") };
DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
InputRowSerde.SerializeResult result = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories);
Assert.assertEquals(Collections.singletonList("Unable to parse value[m3v] for field[m3]"), result.getParseExceptionMessages());
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.
the class InputRowSerdeTest method testSerde.
@Test
public void testSerde() {
// Prepare the mocks & set close() call count expectation to 1
final Aggregator mockedAggregator = EasyMock.createMock(DoubleSumAggregator.class);
EasyMock.expect(mockedAggregator.isNull()).andReturn(false).times(1);
EasyMock.expect(mockedAggregator.getDouble()).andReturn(0d).times(1);
mockedAggregator.aggregate();
EasyMock.expectLastCall().times(1);
mockedAggregator.close();
EasyMock.expectLastCall().times(1);
EasyMock.replay(mockedAggregator);
final Aggregator mockedNullAggregator = EasyMock.createMock(DoubleSumAggregator.class);
EasyMock.expect(mockedNullAggregator.isNull()).andReturn(true).times(1);
mockedNullAggregator.aggregate();
EasyMock.expectLastCall().times(1);
mockedNullAggregator.close();
EasyMock.expectLastCall().times(1);
EasyMock.replay(mockedNullAggregator);
final AggregatorFactory mockedAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
EasyMock.expect(mockedAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedAggregator);
EasyMock.expect(mockedAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
EasyMock.expect(mockedAggregatorFactory.getName()).andReturn("mockedAggregator").anyTimes();
final AggregatorFactory mockedNullAggregatorFactory = EasyMock.createMock(AggregatorFactory.class);
EasyMock.expect(mockedNullAggregatorFactory.factorize(EasyMock.anyObject(ColumnSelectorFactory.class))).andReturn(mockedNullAggregator);
EasyMock.expect(mockedNullAggregatorFactory.getName()).andReturn("mockedNullAggregator").anyTimes();
EasyMock.expect(mockedNullAggregatorFactory.getIntermediateType()).andReturn(ColumnType.DOUBLE).anyTimes();
EasyMock.replay(mockedAggregatorFactory, mockedNullAggregatorFactory);
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3"), mockedAggregatorFactory, mockedNullAggregatorFactory };
DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("d1"), new StringDimensionSchema("d2"), new LongDimensionSchema("d3"), new FloatDimensionSchema("d4"), new DoubleDimensionSchema("d5")));
byte[] data = InputRowSerde.toBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), in, aggregatorFactories).getSerializedRow();
InputRow out = InputRowSerde.fromBytes(InputRowSerde.getTypeHelperMap(dimensionsSpec), data, aggregatorFactories);
Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
Assert.assertEquals(dims, out.getDimensions());
Assert.assertEquals(Collections.emptyList(), out.getDimension("dim_non_existing"));
Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
Assert.assertEquals(200L, out.getRaw("d3"));
Assert.assertEquals(300.1f, out.getRaw("d4"));
Assert.assertEquals(400.5d, out.getRaw("d5"));
Assert.assertEquals(NullHandling.defaultDoubleValue(), out.getMetric("agg_non_existing"));
Assert.assertEquals(5.0f, out.getMetric("m1out").floatValue(), 0.00001);
Assert.assertEquals(100L, out.getMetric("m2out"));
Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
Assert.assertEquals(NullHandling.defaultLongValue(), out.getMetric("unparseable"));
EasyMock.verify(mockedAggregator);
EasyMock.verify(mockedNullAggregator);
}
Aggregations