use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.
the class StorageModule method createOrcFileTailSource.
@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
if (orcCacheConfig.isFileTailCacheEnabled()) {
Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getTotalSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), TimeUnit.MILLISECONDS).recordStats().build();
CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
}
return orcFileTailSource;
}
use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.
the class AbstractTestOrcReader method testCaching.
@Test
public void testCaching() throws Exception {
Cache<OrcDataSourceId, OrcFileTail> orcFileTailCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
OrcFileTailSource orcFileTailSource = new CachingOrcFileTailSource(new StorageOrcFileTailSource(), orcFileTailCache);
Cache<StripeId, Slice> stripeFootercache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, footer) -> ((Slice) footer).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
Cache<StripeStreamId, Slice> stripeStreamCache = CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, stream) -> ((Slice) stream).length()).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build();
Optional<Cache<StripeStreamId, List<RowGroupIndex>>> rowGroupIndexCache = Optional.of(CacheBuilder.newBuilder().maximumWeight(new DataSize(1, MEGABYTE).toBytes()).weigher((id, rowGroupIndices) -> toIntExact(((List<RowGroupIndex>) rowGroupIndices).stream().mapToLong(RowGroupIndex::getRetainedSizeInBytes).sum())).expireAfterAccess(new Duration(10, MINUTES).toMillis(), MILLISECONDS).recordStats().build());
StripeMetadataSource stripeMetadataSource = new CachingStripeMetadataSource(new StorageStripeMetadataSource(), stripeFootercache, stripeStreamCache, rowGroupIndexCache);
try (TempFile tempFile = createTempFile(10001)) {
OrcBatchRecordReader storageReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
assertEquals(orcFileTailCache.stats().missCount(), 1);
assertEquals(orcFileTailCache.stats().hitCount(), 0);
OrcBatchRecordReader cacheReader = createCustomOrcRecordReader(tempFile, ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), INITIAL_BATCH_SIZE, orcFileTailSource, stripeMetadataSource, true, ImmutableMap.of(), false);
assertEquals(orcFileTailCache.stats().missCount(), 1);
assertEquals(orcFileTailCache.stats().hitCount(), 1);
assertEquals(storageReader.getRetainedSizeInBytes(), cacheReader.getRetainedSizeInBytes());
assertEquals(storageReader.getFileRowCount(), cacheReader.getFileRowCount());
assertEquals(storageReader.getSplitLength(), cacheReader.getSplitLength());
storageReader.nextBatch();
assertEquals(stripeFootercache.stats().missCount(), 1);
assertEquals(stripeFootercache.stats().hitCount(), 0);
assertEquals(stripeStreamCache.stats().missCount(), 2);
assertEquals(stripeStreamCache.stats().hitCount(), 0);
assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
assertEquals(rowGroupIndexCache.get().stats().hitCount(), 0);
cacheReader.nextBatch();
assertEquals(stripeFootercache.stats().missCount(), 1);
assertEquals(stripeFootercache.stats().hitCount(), 1);
assertEquals(stripeStreamCache.stats().missCount(), 2);
assertEquals(stripeStreamCache.stats().hitCount(), 2);
assertEquals(rowGroupIndexCache.get().stats().missCount(), 1);
assertEquals(rowGroupIndexCache.get().stats().hitCount(), 1);
assertEquals(storageReader.readBlock(0).getInt(0), cacheReader.readBlock(0).getInt(0));
}
}
use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.
the class HiveClientModule method createOrcFileTailSource.
@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
if (orcCacheConfig.isFileTailCacheEnabled()) {
Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
}
return orcFileTailSource;
}
use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.
the class IcebergModule method createOrcFileTailSource.
@Singleton
@Provides
public OrcFileTailSource createOrcFileTailSource(OrcCacheConfig orcCacheConfig, MBeanExporter exporter) {
int expectedFileTailSizeInBytes = toIntExact(orcCacheConfig.getExpectedFileTailSize().toBytes());
boolean dwrfStripeCacheEnabled = orcCacheConfig.isDwrfStripeCacheEnabled();
OrcFileTailSource orcFileTailSource = new StorageOrcFileTailSource(expectedFileTailSizeInBytes, dwrfStripeCacheEnabled);
if (orcCacheConfig.isFileTailCacheEnabled()) {
Cache<OrcDataSourceId, OrcFileTail> cache = CacheBuilder.newBuilder().maximumWeight(orcCacheConfig.getFileTailCacheSize().toBytes()).weigher((id, tail) -> ((OrcFileTail) tail).getFooterSize() + ((OrcFileTail) tail).getMetadataSize()).expireAfterAccess(orcCacheConfig.getFileTailCacheTtlSinceLastAccess().toMillis(), MILLISECONDS).recordStats().build();
CacheStatsMBean cacheStatsMBean = new CacheStatsMBean(cache);
orcFileTailSource = new CachingOrcFileTailSource(orcFileTailSource, cache);
exporter.export(generatedNameOf(CacheStatsMBean.class, connectorId + "_OrcFileTail"), cacheStatsMBean);
}
return orcFileTailSource;
}
use of com.facebook.presto.orc.cache.OrcFileTailSource in project presto by prestodb.
the class OrcBatchPageSourceFactory method createOrcPageSource.
public static ConnectorPageSource createOrcPageSource(OrcEncoding orcEncoding, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, StandardFunctionResolution functionResolution, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, HiveFileContext hiveFileContext, OrcReaderOptions orcReaderOptions, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
OrcDataSource orcDataSource;
try {
FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(sessionUser, path, configuration).openFile(path, hiveFileContext);
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, orcReaderOptions.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
try {
DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, columns, useOrcColumnNames, path);
OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), orcReaderOptions, hiveFileContext.isCacheable(), dwrfEncryptionProvider, dwrfKeyProvider, hiveFileContext.getStats());
List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader.getTypes(), path);
ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
for (HiveColumnHandle column : physicalColumns) {
if (column.getColumnType() == REGULAR) {
Type type = typeManager.getType(column.getTypeSignature());
includedColumns.put(column.getHiveColumnIndex(), type);
columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
}
}
if (!physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
return new AggregatedOrcPageSource(physicalColumns, reader.getFooter(), typeManager, functionResolution);
}
OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE);
return new OrcBatchPageSource(recordReader, reader.getOrcDataSource(), physicalColumns, typeManager, systemMemoryUsage, stats, hiveFileContext.getStats());
} catch (Exception e) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
if (e instanceof PrestoException) {
throw (PrestoException) e;
}
String message = splitError(e, path, start, length);
if (e.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(HIVE_MISSING_DATA, message, e);
}
throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
Aggregations