use of io.trino.orc.metadata.CompressedMetadataWriter in project trino by trinodb.
the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.
@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
bloomFilterWrite.add(TEST_STRING);
assertTrue(bloomFilterWrite.test(TEST_STRING));
assertTrue(bloomFilterWrite.testSlice(wrappedBuffer(TEST_STRING)));
Slice bloomFilterBytes = new CompressedMetadataWriter(new OrcMetadataWriter(WriterIdentification.TRINO), CompressionKind.NONE, 1024).writeBloomFilters(ImmutableList.of(bloomFilterWrite));
// Read through method
InputStream inputStream = bloomFilterBytes.getInput();
OrcMetadataReader metadataReader = new OrcMetadataReader();
List<BloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
assertEquals(bloomFilters.size(), 1);
assertTrue(bloomFilters.get(0).test(TEST_STRING));
assertTrue(bloomFilters.get(0).testSlice(wrappedBuffer(TEST_STRING)));
assertFalse(bloomFilters.get(0).test(TEST_STRING_NOT_WRITTEN));
assertFalse(bloomFilters.get(0).testSlice(wrappedBuffer(TEST_STRING_NOT_WRITTEN)));
assertEquals(bloomFilterWrite.getNumBits(), bloomFilters.get(0).getNumBits());
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
// Validate bit set
assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
// Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
CodedInputStream input = CodedInputStream.newInstance(bloomFilterBytes.getBytes());
OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
assertEquals(bloomFilterList.size(), 1);
OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
// Validate contents of ORC bloom filter bit set
assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
// hash functions
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
// bit size
assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
Aggregations