use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class OrcTester method getFileMetadata.
public static FileMetadata getFileMetadata(File inputFile, OrcEncoding encoding) throws IOException {
boolean zstdJniDecompressionEnabled = true;
DataSize dataSize = new DataSize(1, MEGABYTE);
OrcDataSource orcDataSource = new FileOrcDataSource(inputFile, dataSize, dataSize, dataSize, true);
RuntimeStats runtimeStats = new RuntimeStats();
OrcReader reader = new OrcReader(orcDataSource, encoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(dataSize, dataSize, dataSize, zstdJniDecompressionEnabled), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, runtimeStats);
Footer footer = reader.getFooter();
Optional<OrcDecompressor> decompressor = createOrcDecompressor(orcDataSource.getId(), reader.getCompressionKind(), reader.getBufferSize(), zstdJniDecompressionEnabled);
ImmutableList.Builder<StripeFooter> stripes = new ImmutableList.Builder<>();
for (StripeInformation stripe : footer.getStripes()) {
// read the footer
byte[] tailBuffer = new byte[toIntExact(stripe.getFooterLength())];
orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer);
try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), new SharedBuffer(NOOP_ORC_LOCAL_MEMORY_CONTEXT), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, Optional.empty(), new TestingHiveOrcAggregatedMemoryContext(), tailBuffer.length)) {
StripeFooter stripeFooter = encoding.createMetadataReader(runtimeStats).readStripeFooter(orcDataSource.getId(), footer.getTypes(), inputStream);
stripes.add(stripeFooter);
}
}
return new FileMetadata(footer, stripes.build());
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class OrcTester method createCustomOrcSelectiveRecordReader.
public static OrcSelectiveRecordReader createCustomOrcSelectiveRecordReader(File file, OrcEncoding orcEncoding, OrcPredicate predicate, List<Type> types, int initialBatchSize, Map<Integer, Map<Subfield, TupleDomainFilter>> filters, List<FilterFunction> filterFunctions, Map<Integer, Integer> filterFunctionInputMapping, Map<Integer, List<Subfield>> requiredSubfields, Map<Integer, Object> constantValues, Map<Integer, Slice> intermediateEncryptionKeys, Map<Integer, Type> includedColumns, List<Integer> outputColumns, boolean mapNullKeysEnabled, OrcAggregatedMemoryContext systemMemoryUsage, boolean appendRowNumber) throws IOException {
OrcDataSource orcDataSource = new FileOrcDataSource(file, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
OrcReader orcReader = new OrcReader(orcDataSource, orcEncoding, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, new OrcReaderOptions(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), MAX_BLOCK_SIZE, false, mapNullKeysEnabled, false, appendRowNumber), false, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingEncryptionLibrary()), DwrfKeyProvider.of(intermediateEncryptionKeys), new RuntimeStats());
assertEquals(orcReader.getColumnNames().subList(0, types.size()), makeColumnNames(types.size()));
return orcReader.createSelectiveRecordReader(includedColumns, outputColumns, filters, filterFunctions, filterFunctionInputMapping, requiredSubfields, constantValues, ImmutableMap.of(), predicate, 0, orcDataSource.getSize(), HIVE_STORAGE_TIME_ZONE, LEGACY_MAP_SUBSCRIPT, systemMemoryUsage, Optional.empty(), initialBatchSize);
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestStorageOrcFileTailSource method testSkipDwrfStripeCacheIfDisabled.
@Test
public void testSkipDwrfStripeCacheIfDisabled() throws IOException {
// beef up the file size to make sure the file can fit the 100 byte long stripe cache
FileOutputStream out = new FileOutputStream(file.getFile());
out.write(new byte[100 * 1000]);
// write the footer and post script
DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder().addAllStripeCacheOffsets(ImmutableList.of(0, 256, 512));
DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE).setCacheMode(BOTH).setCacheSize(512);
writeTail(footer, postScript, out);
out.close();
int tailReadSizeInBytes = 256;
// read the file tail with the disabled "read dwrf stripe cache" feature
StorageOrcFileTailSource src = new StorageOrcFileTailSource(tailReadSizeInBytes, false);
TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource());
OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
assertEquals(orcFileTail.getMetadataSize(), 0);
DwrfProto.Footer actualFooter = readFooter(orcFileTail);
assertEquals(actualFooter, footer.build());
// make sure the stripe cache has not been read
assertFalse(orcFileTail.getDwrfStripeCacheData().isPresent());
assertEquals(orcDataSource.getReadCount(), 1);
DiskRange lastReadRange = orcDataSource.getLastReadRanges().get(0);
assertEquals(lastReadRange.getLength(), tailReadSizeInBytes);
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestStorageOrcFileTailSource method testReadDwrfStripeCacheIfEnabled.
@Test
public void testReadDwrfStripeCacheIfEnabled() throws IOException {
FileOutputStream out = new FileOutputStream(file.getFile());
// write a fake stripe cache
byte[] stripeCache = new byte[100];
for (int i = 0; i < stripeCache.length; i++) {
stripeCache[i] = (byte) i;
}
out.write(stripeCache);
// write the footer and post script
DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder().addAllStripeCacheOffsets(ImmutableList.of(1, 2, 3));
DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE).setCacheMode(BOTH).setCacheSize(stripeCache.length);
writeTail(footer, postScript, out);
out.close();
// read the file tail with the enabled "read dwrf stripe cache" feature
StorageOrcFileTailSource src = new StorageOrcFileTailSource(FOOTER_READ_SIZE_IN_BYTES, true);
OrcDataSource orcDataSource = createFileOrcDataSource();
OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
assertEquals(orcFileTail.getMetadataSize(), 0);
DwrfProto.Footer actualFooter = readFooter(orcFileTail);
assertEquals(actualFooter, footer.build());
// make sure the stripe cache is loaded correctly
assertTrue(orcFileTail.getDwrfStripeCacheData().isPresent());
DwrfStripeCacheData dwrfStripeCacheData = orcFileTail.getDwrfStripeCacheData().get();
assertEquals(dwrfStripeCacheData.getDwrfStripeCacheMode(), INDEX_AND_FOOTER);
assertEquals(dwrfStripeCacheData.getDwrfStripeCacheSize(), stripeCache.length);
assertEquals(dwrfStripeCacheData.getDwrfStripeCacheSlice().getBytes(), stripeCache);
}
use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.
the class TestStorageOrcFileTailSource method testReadExpectedFooterSize.
@Test
public void testReadExpectedFooterSize() throws IOException {
// beef up the file size to make sure it's larger than the expectedFooterSizeInBytes = 567 we will use below
FileOutputStream out = new FileOutputStream(file.getFile());
out.write(new byte[100 * 1000]);
// write the post script
DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setFooterLength(0).setCompression(NONE);
writeTail(postScript, out);
out.close();
// read the OrcFileTail
int expectedFooterSizeInBytes = 567;
StorageOrcFileTailSource src = new StorageOrcFileTailSource(expectedFooterSizeInBytes, false);
TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource());
src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
// make sure only the configured expectedFooterSizeInBytes bytes have been read
assertEquals(orcDataSource.getReadCount(), 1);
DiskRange lastReadRange = orcDataSource.getLastReadRanges().get(0);
assertEquals(lastReadRange.getLength(), expectedFooterSizeInBytes);
}
Aggregations