Search in sources :

Example 21 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class OrcTester method getFileMetadata.

public static FileMetadata getFileMetadata(File inputFile, OrcEncoding encoding) throws IOException {
    boolean zstdJniDecompressionEnabled = true;
    DataSize dataSize = new DataSize(1, MEGABYTE);
    OrcDataSource orcDataSource = new FileOrcDataSource(inputFile, dataSize, dataSize, dataSize, true);
    RuntimeStats runtimeStats = new RuntimeStats();
    OrcReader reader = new OrcReader(orcDataSource, encoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(dataSize, dataSize, dataSize, zstdJniDecompressionEnabled), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, runtimeStats);
    Footer footer = reader.getFooter();
    Optional<OrcDecompressor> decompressor = createOrcDecompressor(orcDataSource.getId(), reader.getCompressionKind(), reader.getBufferSize(), zstdJniDecompressionEnabled);
    ImmutableList.Builder<StripeFooter> stripes = new ImmutableList.Builder<>();
    for (StripeInformation stripe : footer.getStripes()) {
        // read the footer
        byte[] tailBuffer = new byte[toIntExact(stripe.getFooterLength())];
        orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer);
        try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, Optional.empty(), new TestingHiveOrcAggregatedMemoryContext(), tailBuffer.length)) {
            StripeFooter stripeFooter = encoding.createMetadataReader(runtimeStats).readStripeFooter(orcDataSource.getId(), footer.getTypes(), inputStream);
            stripes.add(stripeFooter);
        }
    }
    return new FileMetadata(footer, stripes.build());
}
Also used : OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) OrcInputStream(com.facebook.presto.orc.stream.OrcInputStream) InputStream(java.io.InputStream) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) OrcDecompressor.createOrcDecompressor(com.facebook.presto.orc.OrcDecompressor.createOrcDecompressor) SharedBuffer(com.facebook.presto.orc.stream.SharedBuffer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) DataSize(io.airlift.units.DataSize) Footer(com.facebook.presto.orc.metadata.Footer) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) StripeInformation(com.facebook.presto.orc.metadata.StripeInformation)

Example 22 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class OrcTester method createCustomOrcSelectiveRecordReader.

public static OrcSelectiveRecordReader createCustomOrcSelectiveRecordReader(File file, OrcEncoding orcEncoding, OrcPredicate predicate, List<Type> types, int initialBatchSize, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, Map<Integer, Object> constantValues, Map<Integer, Slice> intermediateEncryptionKeys, Map<Integer, Type> includedColumns, List<Integer> outputColumns, boolean mapNullKeysEnabled, OrcAggregatedMemoryContext systemMemoryUsage, boolean appendRowNumber) throws IOException {
    OrcDataSource orcDataSource = new FileOrcDataSource(file, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, orcEncoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, mapNullKeysEnabled, false, appendRowNumber), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingEncryptionLibrary()), DwrfKeyProvider.of(intermediateEncryptionKeys), new RuntimeStats());
    assertEquals(orcReader.getColumnNames().subList(0, types.size()), makeColumnNames(types.size()));
    return orcReader.createSelectiveRecordReader(includedColumns, outputColumns, filters, filterFunctions, filterFunctionInputMapping, requiredSubfields, constantValues, ImmutableMap.of(), predicate, 0, orcDataSource.getSize(), HIVE_STORAGE_TIME_ZONE, LEGACY_MAP_SUBSCRIPT, systemMemoryUsage, Optional.empty(), initialBatchSize);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 23 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestStorageOrcFileTailSource method testSkipDwrfStripeCacheIfDisabled.

@Test
public void testSkipDwrfStripeCacheIfDisabled() throws IOException {
    // beef up the file size to make sure the file can fit the 100 byte long stripe cache
    FileOutputStream out = new FileOutputStream(file.getFile());
    out.write(new byte[100 * 1000]);
    // write the footer and post script
    DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder().addAllStripeCacheOffsets(ImmutableList.of(0, 256, 512));
    DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE).setCacheMode(BOTH).setCacheSize(512);
    writeTail(footer, postScript, out);
    out.close();
    int tailReadSizeInBytes = 256;
    // read the file tail with the disabled "read dwrf stripe cache" feature
    StorageOrcFileTailSource src = new StorageOrcFileTailSource(tailReadSizeInBytes, false);
    TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource());
    OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
    assertEquals(orcFileTail.getMetadataSize(), 0);
    DwrfProto.Footer actualFooter = readFooter(orcFileTail);
    assertEquals(actualFooter, footer.build());
    // make sure the stripe cache has not been read
    assertFalse(orcFileTail.getDwrfStripeCacheData().isPresent());
    assertEquals(orcDataSource.getReadCount(), 1);
    DiskRange lastReadRange = orcDataSource.getLastReadRanges().get(0);
    assertEquals(lastReadRange.getLength(), tailReadSizeInBytes);
}
Also used : FileOutputStream(java.io.FileOutputStream) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Test(org.testng.annotations.Test)

Example 24 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestStorageOrcFileTailSource method testReadDwrfStripeCacheIfEnabled.

@Test
public void testReadDwrfStripeCacheIfEnabled() throws IOException {
    FileOutputStream out = new FileOutputStream(file.getFile());
    // write a fake stripe cache
    byte[] stripeCache = new byte[100];
    for (int i = 0; i < stripeCache.length; i++) {
        stripeCache[i] = (byte) i;
    }
    out.write(stripeCache);
    // write the footer and post script
    DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder().addAllStripeCacheOffsets(ImmutableList.of(1, 2, 3));
    DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE).setCacheMode(BOTH).setCacheSize(stripeCache.length);
    writeTail(footer, postScript, out);
    out.close();
    // read the file tail with the enabled "read dwrf stripe cache" feature
    StorageOrcFileTailSource src = new StorageOrcFileTailSource(FOOTER_READ_SIZE_IN_BYTES, true);
    OrcDataSource orcDataSource = createFileOrcDataSource();
    OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
    assertEquals(orcFileTail.getMetadataSize(), 0);
    DwrfProto.Footer actualFooter = readFooter(orcFileTail);
    assertEquals(actualFooter, footer.build());
    // make sure the stripe cache is loaded correctly
    assertTrue(orcFileTail.getDwrfStripeCacheData().isPresent());
    DwrfStripeCacheData dwrfStripeCacheData = orcFileTail.getDwrfStripeCacheData().get();
    assertEquals(dwrfStripeCacheData.getDwrfStripeCacheMode(), INDEX_AND_FOOTER);
    assertEquals(dwrfStripeCacheData.getDwrfStripeCacheSize(), stripeCache.length);
    assertEquals(dwrfStripeCacheData.getDwrfStripeCacheSlice().getBytes(), stripeCache);
}
Also used : StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) FileOutputStream(java.io.FileOutputStream) DwrfStripeCacheData(com.facebook.presto.orc.metadata.DwrfStripeCacheData) Test(org.testng.annotations.Test)

Example 25 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestStorageOrcFileTailSource method testReadExpectedFooterSize.

@Test
public void testReadExpectedFooterSize() throws IOException {
    // beef up the file size to make sure it's larger than the expectedFooterSizeInBytes = 567 we will use below
    FileOutputStream out = new FileOutputStream(file.getFile());
    out.write(new byte[100 * 1000]);
    // write the post script
    DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setFooterLength(0).setCompression(NONE);
    writeTail(postScript, out);
    out.close();
    // read the OrcFileTail
    int expectedFooterSizeInBytes = 567;
    StorageOrcFileTailSource src = new StorageOrcFileTailSource(expectedFooterSizeInBytes, false);
    TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource());
    src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
    // make sure only the configured expectedFooterSizeInBytes bytes have been read
    assertEquals(orcDataSource.getReadCount(), 1);
    DiskRange lastReadRange = orcDataSource.getLastReadRanges().get(0);
    assertEquals(lastReadRange.getLength(), expectedFooterSizeInBytes);
}
Also used : FileOutputStream(java.io.FileOutputStream) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Test(org.testng.annotations.Test)

Aggregations

StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)32 RuntimeStats (com.facebook.presto.common.RuntimeStats)15 Test (org.testng.annotations.Test)15 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)13 DataSize (io.airlift.units.DataSize)12 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)7 List (java.util.List)7 Type (com.facebook.presto.common.type.Type)6 ImmutableList (com.google.common.collect.ImmutableList)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 ArrayType (com.facebook.presto.common.type.ArrayType)5 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)5 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)4 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)4 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)4 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)4