use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesAggregatorFactoryTest method testCompare2.
@Test
public void testCompare2() {
Random rand = new Random(0);
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
Comparator comparator = factory.getComparator();
for (int i = 1; i < 1000; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(50);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 1 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
for (int i = 1; i < 100; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(500);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 2 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
for (int i = 1; i < 10; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(100000);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 20000 + rand.nextInt(100000);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesAggregatorFactoryTest method testEstimateCardinalityForZeroCardinality.
@Test
public void testEstimateCardinalityForZeroCardinality() {
HyperLogLogCollector emptyHyperLogLogCollector = HyperUniquesBufferAggregator.doGet(ByteBuffer.allocate(HyperLogLogCollector.getLatestNumBytesForDenseStorage()), 0);
Assert.assertEquals(0L, HyperUniquesAggregatorFactory.estimateCardinality(null, true));
Assert.assertEquals(0d, HyperUniquesAggregatorFactory.estimateCardinality(null, false));
Assert.assertEquals(0L, HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, true));
Assert.assertEquals(0d, HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, false));
Assert.assertEquals(HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, true).getClass(), HyperUniquesAggregatorFactory.estimateCardinality(null, true).getClass());
Assert.assertEquals(HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, false).getClass(), HyperUniquesAggregatorFactory.estimateCardinality(null, false).getClass());
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = temporaryFolder.newFolder();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(new ObjectColumnSelector() {
@Nullable
@Override
public Object getObject() {
return collector;
}
@Override
public Class classOfObject() {
return HyperLogLogCollector.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// doesn't matter in tests
}
});
}
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeTo(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder, null);
ColumnHolder columnHolder = builder.build();
ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesSerdeForTest method getExtractor.
@Override
public ComplexMetricExtractor getExtractor() {
return new ComplexMetricExtractor() {
@Override
public Class<HyperLogLogCollector> extractedClass() {
return HyperLogLogCollector.class;
}
@Override
public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
Object rawValue = inputRow.getRaw(metricName);
if (rawValue instanceof HyperLogLogCollector) {
return (HyperLogLogCollector) rawValue;
} else {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
List<String> dimValues = inputRow.getDimension(metricName);
if (dimValues == null) {
return collector;
}
for (String dimensionValue : dimValues) {
collector.add(hashFn.hashBytes(StringUtils.toUtf8(dimensionValue)).asBytes());
}
return collector;
}
}
};
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class BatchDeltaIngestionTest method verifyRows.
private void verifyRows(List<ImmutableMap<String, Object>> expectedRows, List<InputRow> actualRows, List<String> expectedDimensions, List<String> expectedMetrics) {
Assert.assertEquals(expectedRows.size(), actualRows.size());
for (int i = 0; i < expectedRows.size(); i++) {
Map<String, Object> expected = expectedRows.get(i);
InputRow actual = actualRows.get(i);
Assert.assertEquals(expected.get("time"), actual.getTimestamp());
Assert.assertEquals(expectedDimensions, actual.getDimensions());
expectedDimensions.forEach(s -> Assert.assertEquals(expected.get(s), actual.getDimension(s)));
for (String metric : expectedMetrics) {
Object actualValue = actual.getRaw(metric);
if (actualValue instanceof HyperLogLogCollector) {
Assert.assertEquals((Double) expected.get(metric), (Double) HyperUniquesAggregatorFactory.estimateCardinality(actualValue, false), 0.001);
} else {
Assert.assertEquals(expected.get(metric), actual.getMetric(metric));
}
}
}
}
Aggregations