Search in sources :

Example 11 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class DwrfMetadataReader method readStripeFooter.

@Override
public StripeFooter readStripeFooter(HiveWriterVersion hiveWriterVersion, List<OrcType> types, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    DwrfProto.StripeFooter stripeFooter = DwrfProto.StripeFooter.parseFrom(input);
    return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(types, stripeFooter.getColumnsList()));
}
Also used : CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto)

Example 12 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class DwrfMetadataReader method readFooter.

@Override
public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws IOException {
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    DwrfProto.Footer footer = DwrfProto.Footer.parseFrom(input);
    return new Footer(footer.getNumberOfRows(), footer.getRowIndexStride(), toStripeInformation(footer.getStripesList()), toType(footer.getTypesList()), toColumnStatistics(hiveWriterVersion, footer.getStatisticsList(), false), toUserMetadata(footer.getMetadataList()));
}
Also used : CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) DwrfProto(com.facebook.presto.orc.proto.DwrfProto)

Example 13 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class TestOrcBloomFilters method testOrcHiveBloomFilterSerde.

@Test
public void testOrcHiveBloomFilterSerde() throws Exception {
    BloomFilter bloomFilterWrite = new BloomFilter(1000L, 0.05);
    bloomFilterWrite.addString(TEST_STRING);
    assertTrue(bloomFilterWrite.testString(TEST_STRING));
    OrcProto.BloomFilter.Builder bloomFilterBuilder = OrcProto.BloomFilter.newBuilder();
    bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet()));
    bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions());
    OrcProto.BloomFilter bloomFilter = bloomFilterBuilder.build();
    OrcProto.BloomFilterIndex bloomFilterIndex = OrcProto.BloomFilterIndex.getDefaultInstance();
    byte[] bytes = serializeBloomFilterToIndex(bloomFilter, bloomFilterIndex);
    // Read through method
    InputStream inputStream = new ByteArrayInputStream(bytes);
    OrcMetadataReader metadataReader = new OrcMetadataReader(new RuntimeStats());
    List<HiveBloomFilter> bloomFilters = metadataReader.readBloomFilterIndexes(inputStream);
    assertEquals(bloomFilters.size(), 1);
    assertTrue(bloomFilters.get(0).testString(TEST_STRING));
    assertFalse(bloomFilters.get(0).testString(TEST_STRING_NOT_WRITTEN));
    assertEquals(bloomFilterWrite.getBitSize(), bloomFilters.get(0).getBitSize());
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilters.get(0).getNumHashFunctions());
    // Validate bit set
    assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet()));
    // Read directly: allows better inspection of the bit sets (helped to fix a lot of bugs)
    CodedInputStream input = CodedInputStream.newInstance(bytes);
    OrcProto.BloomFilterIndex deserializedBloomFilterIndex = OrcProto.BloomFilterIndex.parseFrom(input);
    List<OrcProto.BloomFilter> bloomFilterList = deserializedBloomFilterIndex.getBloomFilterList();
    assertEquals(bloomFilterList.size(), 1);
    OrcProto.BloomFilter bloomFilterRead = bloomFilterList.get(0);
    // Validate contents of ORC bloom filter bit set
    assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet()));
    // hash functions
    assertEquals(bloomFilterWrite.getNumHashFunctions(), bloomFilterRead.getNumHashFunctions());
    // bit size
    assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) InputStream(java.io.InputStream) RuntimeStats(com.facebook.presto.common.RuntimeStats) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto) OrcMetadataReader(com.facebook.presto.orc.metadata.OrcMetadataReader) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) TupleDomainOrcPredicate.checkInBloomFilter(com.facebook.presto.orc.TupleDomainOrcPredicate.checkInBloomFilter) BloomFilter(com.facebook.presto.orc.metadata.statistics.BloomFilter) ByteArrayInputStream(java.io.ByteArrayInputStream) HiveBloomFilter(com.facebook.presto.orc.metadata.statistics.HiveBloomFilter) Test(org.testng.annotations.Test)

Example 14 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class AbstractTestDwrfStripeCaching method readFileFooter.

static DwrfProto.Footer readFileFooter(File orcFile) {
    try (RandomAccessFile file = new RandomAccessFile(orcFile, "r")) {
        // read postscript size
        file.seek(file.length() - 1);
        int postScriptSize = file.read() & 0xff;
        // read postscript
        long postScriptPosition = file.length() - postScriptSize - 1;
        byte[] postScriptBytes = readBytes(file, postScriptPosition, postScriptSize);
        CodedInputStream postScriptInput = CodedInputStream.newInstance(postScriptBytes, 0, postScriptSize);
        DwrfProto.PostScript postScript = DwrfProto.PostScript.parseFrom(postScriptInput);
        // read footer
        long footerPosition = postScriptPosition - postScript.getFooterLength();
        int footerLength = toIntExact(postScript.getFooterLength());
        byte[] footerBytes = readBytes(file, footerPosition, postScript.getFooterLength());
        int compressionBufferSize = toIntExact(postScript.getCompressionBlockSize());
        OrcDataSourceId dataSourceId = new OrcDataSourceId(orcFile.getName());
        Optional<OrcDecompressor> decompressor = OrcDecompressor.createOrcDecompressor(dataSourceId, ZLIB, compressionBufferSize);
        InputStream footerInputStream = new OrcInputStream(dataSourceId, new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(footerBytes).slice(0, footerLength).getInput(), decompressor, Optional.empty(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, footerLength);
        return DwrfProto.Footer.parseFrom(footerInputStream);
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) InputStream(java.io.InputStream) UncheckedIOException(java.io.UncheckedIOException) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) RandomAccessFile(java.io.RandomAccessFile)

Example 15 with CodedInputStream

use of com.facebook.presto.orc.protobuf.CodedInputStream in project presto by prestodb.

the class OrcMetadataReader method readStripeFooter.

@Override
public StripeFooter readStripeFooter(OrcDataSourceId orcDataSourceId, List<OrcType> types, InputStream inputStream) throws IOException {
    long cpuStart = THREAD_MX_BEAN.getCurrentThreadCpuTime();
    CodedInputStream input = CodedInputStream.newInstance(inputStream);
    OrcProto.StripeFooter stripeFooter = OrcProto.StripeFooter.parseFrom(input);
    runtimeStats.addMetricValue("OrcReadStripeFooterTimeNanos", THREAD_MX_BEAN.getCurrentThreadCpuTime() - cpuStart);
    return new StripeFooter(toStream(stripeFooter.getStreamsList()), toColumnEncoding(stripeFooter.getColumnsList()), ImmutableList.of());
}
Also used : CodedInputStream(com.facebook.presto.orc.protobuf.CodedInputStream) OrcProto(com.facebook.presto.orc.proto.OrcProto)

Aggregations

CodedInputStream (com.facebook.presto.orc.protobuf.CodedInputStream)20 DwrfProto (com.facebook.presto.orc.proto.DwrfProto)10 OrcProto (com.facebook.presto.orc.proto.OrcProto)10 InputStream (java.io.InputStream)7 ImmutableList (com.google.common.collect.ImmutableList)6 Slice (io.airlift.slice.Slice)6 IOException (java.io.IOException)6 HiveWriterVersion (com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion)5 List (java.util.List)5 ColumnEncodingKind (com.facebook.presto.orc.metadata.ColumnEncoding.ColumnEncodingKind)4 SNAPPY (com.facebook.presto.orc.metadata.CompressionKind.SNAPPY)4 ZLIB (com.facebook.presto.orc.metadata.CompressionKind.ZLIB)4 OrcTypeKind (com.facebook.presto.orc.metadata.OrcType.OrcTypeKind)4 ORIGINAL (com.facebook.presto.orc.metadata.PostScript.HiveWriterVersion.ORIGINAL)4 StreamKind (com.facebook.presto.orc.metadata.Stream.StreamKind)4 HiveBloomFilter (com.facebook.presto.orc.metadata.statistics.HiveBloomFilter)4 OrcInputStream (com.facebook.presto.orc.stream.OrcInputStream)4 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 Math.toIntExact (java.lang.Math.toIntExact)4