use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.
the class OrcMetadataReader method readStripeFooter.
@Override
public StripeFooter readStripeFooter(ColumnMetadata<OrcType> types, InputStream inputStream, ZoneId legacyFileTimeZone) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()), Optional.ofNullable(emptyToNull(stripeFooter.getWriterTimezone())).map(zone -> TimeZone.getTimeZone(zone).toZoneId()).orElse(legacyFileTimeZone));
}
use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.
the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.
@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
HashableBloomFilter bloomFilterWrite = new HashableBloomFilter(1000L, 0.05);
bloomFilterWrite.add(TEST_STRING);
assertTrue(bloomFilterWrite.test(TEST_STRING));
assertTrue(bloomFilterWrite.test(wrappedBuffer(TEST_STRING)));
OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
// Read through method
InputStream inputStream = new ByteArrayInputStream(bytes);
OrcMetadataReader metadataReader = new OrcMetadataReader();
List<HashableBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
assertEquals(bloomFilters.size(), 1);
assertTrue(bloomFilters.get(0).test(TEST_STRING));
assertTrue(bloomFilters.get(0).test(wrappedBuffer(TEST_STRING)));
assertFalse(bloomFilters.get(0).test(TEST_STRING_NOT_WRITTEN));
assertFalse(bloomFilters.get(0).test(wrappedBuffer(TEST_STRING_NOT_WRITTEN)));
assertEquals(bloomFilterWrite.getNumBits(), bloomFilters.get(0).getNumBits());
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
// Validate bit set
assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
// Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
CodedInputStream input = CodedInputStream.newInstance(bytes);
OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
assertEquals(bloomFilterList.size(), 1);
OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
// Validate contents of ORC bloom filter bit set
assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
// hash functions
assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
// bit size
assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.
the class OrcMetadataReader method readFooter.
@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
input.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
OrcProto.Footer footer = OrcProto.Footer.parseFrom(input);
return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), toStripeInformation(footer.getStripesList()), toType(footer.getTypesList()), toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false), toUserMetadata(footer.getMetadataList()));
}
use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.
the class OrcMetadataReader method readBloomFilterIndexes.
@Override
public List<HashableBloomFilter> readBloomFilterIndexes(InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
OrcProto.BloomFilterIndex bloomFilter = OrcProto.BloomFilterIndex.parseFrom(input);
List<OrcProto.BloomFilter> bloomFilterList = bloomFilter.getBloomFilterList();
ImmutableList.Builder<HashableBloomFilter> builder = ImmutableList.builder();
for (OrcProto.BloomFilter orcBloomFilter : bloomFilterList) {
if (orcBloomFilter.hasUtf8Bitset()) {
ByteString utf8Bitset = orcBloomFilter.getUtf8Bitset();
long[] bits = new long[utf8Bitset.size() / 8];
utf8Bitset.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN).asLongBuffer().get(bits);
builder.add(new HashableBloomFilter(bits, orcBloomFilter.getNumHashFunctions()));
} else {
builder.add(new HashableBloomFilter(Longs.toArray(orcBloomFilter.getBitsetList()), orcBloomFilter.getNumHashFunctions()));
}
}
return builder.build();
}
use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.
the class OrcMetadataReader method readMetadata.
@Override
public Metadata readMetadata(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
CodedInputStream input = CodedInputStream.newInstance(inputStream);
input.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
OrcProto.Metadata metadata = OrcProto.Metadata.parseFrom(input);
return new Metadata(toStripeStatistics(hiveWriterVersion, metadata.getStripeStatsList()));
}
Aggregations