Search in sources :

Example 1 with CodedInputStream

use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.

the class OrcMetadataReader method readStripeFooter.

@Override
public StripeFooter readStripeFooter(ColumnMetadata<OrcType> types, InputStream inputStream, ZoneId legacyFileTimeZone) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
    return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()), Optional.ofNullable(emptyToNull(stripeFooter.getWriterTimezone())).map(zone -> TimeZone.getTimeZone(zone).toZoneId()).orElse(legacyFileTimeZone));
}
Also used : CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) OrcProto(io.prestosql.orc.proto.OrcProto)

Example 2 with CodedInputStream

use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.

the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.

@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
    HashableBloomFilter bloomFilterWrite = new HashableBloomFilter(1000L, 0.05);
    bloomFilterWrite.add(TEST_STRING);
    assertTrue(bloomFilterWrite.test(TEST_STRING));
    assertTrue(bloomFilterWrite.test(wrappedBuffer(TEST_STRING)));
    OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
    bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
    bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
    OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
    OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
    byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
    // Read through method
    InputStream inputStream = new ByteArrayInputStream(bytes);
    OrcMetadataReader metadataReader = new OrcMetadataReader();
    List<HashableBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
    assertEquals(bloomFilters.size(), 1);
    assertTrue(bloomFilters.get(0).test(TEST_STRING));
    assertTrue(bloomFilters.get(0).test(wrappedBuffer(TEST_STRING)));
    assertFalse(bloomFilters.get(0).test(TEST_STRING_NOT_WRITTEN));
    assertFalse(bloomFilters.get(0).test(wrappedBuffer(TEST_STRING_NOT_WRITTEN)));
    assertEquals(bloomFilterWrite.getNumBits(), bloomFilters.get(0).getNumBits());
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
    // Validate bit set
    assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
    // Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
    CodedInputStream input = CodedInputStream.newInstance(bytes);
    OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
    List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
    assertEquals(bloomFilterList.size(), 1);
    OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
    // Validate contents of ORC bloom filter bit set
    assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
    // hash functions
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
    // bit size
    assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
Also used : CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) OrcProto(io.prestosql.orc.proto.OrcProto) OrcMetadataReader(io.prestosql.orc.metadata.OrcMetadataReader) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(io.prestosql.orc.TupleDomainOrcPredicate.checkInBloomFilter) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) ByteArrayInputStream(java.io.ByteArrayInputStream) Test(org.testng.annotations.Test)

Example 3 with CodedInputStream

use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.

the class OrcMetadataReader method readFooter.

@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    input.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
    OrcProto.Footer footer = OrcProto.Footer.parseFrom(input);
    return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), toStripeInformation(footer.getStripesList()), toType(footer.getTypesList()), toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false), toUserMetadata(footer.getMetadataList()));
}
Also used : CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) OrcProto(io.prestosql.orc.proto.OrcProto)

Example 4 with CodedInputStream

use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.

the class OrcMetadataReader method readBloomFilterIndexes.

@Override
public List<HashableBloomFilter> readBloomFilterIndexes(InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    OrcProto.BloomFilterIndex bloomFilter = OrcProto.BloomFilterIndex.parseFrom(input);
    List<OrcProto.BloomFilter> bloomFilterList = bloomFilter.getBloomFilterList();
    ImmutableList.Builder<HashableBloomFilter> builder = ImmutableList.builder();
    for (OrcProto.BloomFilter orcBloomFilter : bloomFilterList) {
        if (orcBloomFilter.hasUtf8Bitset()) {
            ByteString utf8Bitset = orcBloomFilter.getUtf8Bitset();
            long[] bits = new long[utf8Bitset.size() / 8];
            utf8Bitset.asReadOnlyByteBuffer().order(ByteOrder.LITTLE_ENDIAN).asLongBuffer().get(bits);
            builder.add(new HashableBloomFilter(bits, orcBloomFilter.getNumHashFunctions()));
        } else {
            builder.add(new HashableBloomFilter(Longs.toArray(orcBloomFilter.getBitsetList()), orcBloomFilter.getNumHashFunctions()));
        }
    }
    return builder.build();
}
Also used : CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ByteString(io.prestosql.orc.protobuf.ByteString) OrcProto(io.prestosql.orc.proto.OrcProto) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter) HashableBloomFilter(io.prestosql.orc.metadata.statistics.HashableBloomFilter)

Example 5 with CodedInputStream

use of io.prestosql.orc.protobuf.CodedInputStream in project hetu-core by openlookeng.

the class OrcMetadataReader method readMetadata.

@Override
public Metadata readMetadata(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    input.setSizeLimit(PROTOBUF_MESSAGE_MAX_LIMIT);
    OrcProto.Metadata metadata = OrcProto.Metadata.parseFrom(input);
    return new Metadata(toStripeStatistics(hiveWriterVersion, metadata.getStripeStatsList()));
}
Also used : CodedInputStream(io.prestosql.orc.protobuf.CodedInputStream) OrcProto(io.prestosql.orc.proto.OrcProto)

Aggregations

OrcProto (io.prestosql.orc.proto.OrcProto)7 CodedInputStream (io.prestosql.orc.protobuf.CodedInputStream)7 HashableBloomFilter (io.prestosql.orc.metadata.statistics.HashableBloomFilter)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 ByteString (io.prestosql.orc.protobuf.ByteString)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Strings.emptyToNull (com.google.common.base.Strings.emptyToNull)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Longs (com.google.common.primitives.Longs)1 Slice (io.airlift.slice.Slice)1 SliceUtf8.lengthOfCodePoint (io.airlift.slice.SliceUtf8.lengthOfCodePoint)1 SliceUtf8.tryGetCodePointAt (io.airlift.slice.SliceUtf8.tryGetCodePointAt)1 Slices (io.airlift.slice.Slices)1 DataSize (io.airlift.units.DataSize)1 GIGABYTE (io.airlift.units.DataSize.Unit.GIGABYTE)1 TupleDomainOrcPredicate.checkInBloomFilter (io.prestosql.orc.TupleDomainOrcPredicate.checkInBloomFilter)1 ColumnEncodingKind (io.prestosql.orc.metadata.ColumnEncoding.ColumnEncodingKind)1 LZ4 (io.prestosql.orc.metadata.CompressionKind.LZ4)1