Search in sources :

Example 11 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class OrcStorageManager method getPageSource.

@Override
public ConnectorPageSource getPageSource(HdfsContext hdfsContext, HiveFileContext hiveFileContext, UUID shardUuid, Optional<UUID> deltaShardUuid, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, List<Long> columnIds, List<Type> columnTypes, TupleDomain<RaptorColumnHandle> effectivePredicate, ReaderAttributes readerAttributes, OptionalLong transactionId, Optional<Map<String, Type>> allColumnTypes) {
    FileSystem fileSystem = orcDataEnvironment.getFileSystem(hdfsContext);
    OrcDataSource dataSource = openShard(fileSystem, shardUuid, readerAttributes);
    OrcAggregatedMemoryContext systemMemoryUsage = new RaptorOrcAggregatedMemoryContext();
    try {
        OrcReader reader = new OrcReader(dataSource, ORC, orcFileTailSource, stripeMetadataSourceFactory, new RaptorOrcAggregatedMemoryContext(), new OrcReaderOptions(readerAttributes.getMaxMergeDistance(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE, readerAttributes.isZstdJniDecompressionEnabled()), hiveFileContext.isCacheable(), NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
        for (int i = 0; i < columnIds.size(); i++) {
            long columnId = columnIds.get(i);
            if (isHiddenColumn(columnId)) {
                columnIndexes.add(toSpecialIndex(columnId));
                continue;
            }
            Integer index = indexMap.get(columnId);
            if (index == null) {
                columnIndexes.add(NULL_COLUMN);
            } else {
                columnIndexes.add(index);
                includedColumns.put(index, toOrcFileType(columnTypes.get(i), typeManager));
            }
        }
        OrcPredicate predicate = getPredicate(effectivePredicate, indexMap);
        StorageTypeConverter storageTypeConverter = new StorageTypeConverter(typeManager);
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(storageTypeConverter.toStorageTypes(includedColumns.build()), predicate, DEFAULT_STORAGE_TIMEZONE, systemMemoryUsage, INITIAL_BATCH_SIZE);
        Optional<ShardRewriter> shardRewriter = Optional.empty();
        if (transactionId.isPresent()) {
            checkState(allColumnTypes.isPresent());
            if (reader.getFooter().getNumberOfRows() >= Integer.MAX_VALUE) {
                throw new PrestoException(RAPTOR_ERROR, "File has too many rows, failed to read file: " + shardUuid);
            }
            shardRewriter = Optional.of(createShardRewriter(hdfsContext, fileSystem, transactionId.getAsLong(), bucketNumber, shardUuid, toIntExact(reader.getFooter().getNumberOfRows()), deltaShardUuid, tableSupportsDeltaDelete, allColumnTypes.get()));
        }
        return new OrcUpdatablePageSource(shardRewriter, recordReader, new OrcPageSource(recordReader, dataSource, columnIds, columnTypes, columnIndexes.build(), shardUuid, bucketNumber, systemMemoryUsage, new DeltaShardLoader(deltaShardUuid, tableSupportsDeltaDelete, this, fileSystem)));
    } catch (IOException | RuntimeException e) {
        closeQuietly(dataSource);
        throw new PrestoException(RAPTOR_ERROR, "Failed to create page source for shard " + shardUuid, e);
    } catch (Throwable t) {
        closeQuietly(dataSource);
        throw t;
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) FileSystem(org.apache.hadoop.fs.FileSystem) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) RowType(com.facebook.presto.common.type.RowType) TimestampType(com.facebook.presto.common.type.TimestampType) MapType(com.facebook.presto.common.type.MapType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) OptionalLong(java.util.OptionalLong) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Example 12 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestOrcRecordReaderDwrfStripeCaching method assertFileContentCachingEnabled.

private void assertFileContentCachingEnabled(File orcFile, List<DiskRange> forbiddenRanges) throws IOException {
    try (TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource(orcFile))) {
        StripeMetadataSourceFactory delegateSourceFactory = StripeMetadataSourceFactory.of(new StorageStripeMetadataSource());
        DwrfAwareStripeMetadataSourceFactory dwrfAwareFactory = new DwrfAwareStripeMetadataSourceFactory(delegateSourceFactory);
        // set zeroes to avoid file caching and merging of small disk ranges
        OrcReaderOptions orcReaderOptions = new OrcReaderOptions(new DataSize(0, MEGABYTE), new DataSize(0, MEGABYTE), new DataSize(1, MEGABYTE), false);
        OrcReader orcReader = new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(READ_TAIL_SIZE_IN_BYTES, true), dwrfAwareFactory, NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, orcReaderOptions, false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        assertRecordValues(orcDataSource, orcReader);
        // check that the reader used the cache to read stripe indexes and footers
        assertForbiddenRanges(orcDataSource, forbiddenRanges);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 13 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TestOrcRecordReaderDwrfStripeCaching method assertFileContentCachingDisabled.

private void assertFileContentCachingDisabled(File orcFile) throws IOException {
    try (TestingOrcDataSource orcDataSource = new TestingOrcDataSource(createFileOrcDataSource(orcFile))) {
        StripeMetadataSourceFactory delegateSourceFactory = StripeMetadataSourceFactory.of(new StorageStripeMetadataSource());
        DwrfAwareStripeMetadataSourceFactory dwrfAwareFactory = new DwrfAwareStripeMetadataSourceFactory(delegateSourceFactory);
        OrcReader orcReader = new OrcReader(orcDataSource, DWRF, new StorageOrcFileTailSource(READ_TAIL_SIZE_IN_BYTES, false), dwrfAwareFactory, NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
        assertRecordValues(orcDataSource, orcReader);
    }
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 14 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class TaskContext method getTaskStats.

public TaskStats getTaskStats() {
    // check for end state to avoid callback ordering problems
    updateStatsIfDone(taskStateMachine.getState());
    List<PipelineStats> pipelineStats = ImmutableList.copyOf(transform(pipelineContexts, PipelineContext::getPipelineStats));
    long lastExecutionEndTime = 0;
    int totalDrivers = 0;
    int queuedDrivers = 0;
    int queuedPartitionedDrivers = 0;
    long queuedPartitionedSplitsWeight = 0;
    int runningDrivers = 0;
    int runningPartitionedDrivers = 0;
    long runningPartitionedSplitsWeight = 0;
    int blockedDrivers = 0;
    int completedDrivers = 0;
    long totalScheduledTime = 0;
    long totalCpuTime = 0;
    long totalBlockedTime = 0;
    long totalAllocation = 0;
    long rawInputDataSize = 0;
    long rawInputPositions = 0;
    long processedInputDataSize = 0;
    long processedInputPositions = 0;
    long outputDataSize = 0;
    long outputPositions = 0;
    long physicalWrittenDataSize = 0;
    RuntimeStats mergedRuntimeStats = RuntimeStats.copyOf(runtimeStats);
    ImmutableSet.Builder<BlockedReason> blockedReasons = ImmutableSet.builder();
    boolean hasRunningPipelines = false;
    boolean runningPipelinesFullyBlocked = true;
    for (PipelineStats pipeline : pipelineStats) {
        if (pipeline.getLastEndTime() != null) {
            lastExecutionEndTime = max(pipeline.getLastEndTime().getMillis(), lastExecutionEndTime);
        }
        if (pipeline.getRunningDrivers() > 0 || pipeline.getRunningPartitionedDrivers() > 0 || pipeline.getBlockedDrivers() > 0) {
            // pipeline is running
            hasRunningPipelines = true;
            runningPipelinesFullyBlocked &= pipeline.isFullyBlocked();
            blockedReasons.addAll(pipeline.getBlockedReasons());
        }
        totalDrivers += pipeline.getTotalDrivers();
        queuedDrivers += pipeline.getQueuedDrivers();
        queuedPartitionedDrivers += pipeline.getQueuedPartitionedDrivers();
        queuedPartitionedSplitsWeight += pipeline.getQueuedPartitionedSplitsWeight();
        runningDrivers += pipeline.getRunningDrivers();
        runningPartitionedDrivers += pipeline.getRunningPartitionedDrivers();
        runningPartitionedSplitsWeight += pipeline.getRunningPartitionedSplitsWeight();
        blockedDrivers += pipeline.getBlockedDrivers();
        completedDrivers += pipeline.getCompletedDrivers();
        totalScheduledTime += pipeline.getTotalScheduledTimeInNanos();
        totalCpuTime += pipeline.getTotalCpuTimeInNanos();
        totalBlockedTime += pipeline.getTotalBlockedTimeInNanos();
        totalAllocation += pipeline.getTotalAllocationInBytes();
        if (pipeline.isInputPipeline()) {
            rawInputDataSize += pipeline.getRawInputDataSizeInBytes();
            rawInputPositions += pipeline.getRawInputPositions();
            processedInputDataSize += pipeline.getProcessedInputDataSizeInBytes();
            processedInputPositions += pipeline.getProcessedInputPositions();
        }
        if (pipeline.isOutputPipeline()) {
            outputDataSize += pipeline.getOutputDataSizeInBytes();
            outputPositions += pipeline.getOutputPositions();
        }
        physicalWrittenDataSize += pipeline.getPhysicalWrittenDataSizeInBytes();
        pipeline.getOperatorSummaries().stream().forEach(stats -> mergedRuntimeStats.mergeWith(stats.getRuntimeStats()));
    }
    long startNanos = this.startNanos.get();
    if (startNanos == 0) {
        startNanos = System.nanoTime();
    }
    long queuedTimeInNanos = startNanos - createNanos;
    long endNanos = this.endNanos.get();
    long elapsedTimeInNanos;
    if (endNanos >= startNanos) {
        elapsedTimeInNanos = endNanos - createNanos;
    } else {
        elapsedTimeInNanos = 0;
    }
    int fullGcCount = getFullGcCount();
    Duration fullGcTime = getFullGcTime();
    long userMemory = taskMemoryContext.getUserMemory();
    long systemMemory = taskMemoryContext.getSystemMemory();
    updatePeakMemory();
    synchronized (cumulativeMemoryLock) {
        if (lastTaskStatCallNanos == 0) {
            lastTaskStatCallNanos = startNanos;
        }
        double sinceLastPeriodMillis = (System.nanoTime() - lastTaskStatCallNanos) / 1_000_000.0;
        long averageUserMemoryForLastPeriod = (userMemory + lastUserMemoryReservation) / 2;
        long averageTotalMemoryForLastPeriod = (userMemory + systemMemory + lastTotalMemoryReservation) / 2;
        cumulativeUserMemory.addAndGet(averageUserMemoryForLastPeriod * sinceLastPeriodMillis);
        cumulativeTotalMemory.addAndGet(averageTotalMemoryForLastPeriod * sinceLastPeriodMillis);
        lastTaskStatCallNanos = System.nanoTime();
        lastUserMemoryReservation = userMemory;
        lastTotalMemoryReservation = systemMemory + userMemory;
    }
    boolean fullyBlocked = hasRunningPipelines && runningPipelinesFullyBlocked;
    return new TaskStats(taskStateMachine.getCreatedTime(), executionStartTime.get(), lastExecutionStartTime.get(), lastExecutionEndTime == 0 ? null : new DateTime(lastExecutionEndTime), executionEndTime.get(), elapsedTimeInNanos, queuedTimeInNanos, totalDrivers, queuedDrivers, queuedPartitionedDrivers, queuedPartitionedSplitsWeight, runningDrivers, runningPartitionedDrivers, runningPartitionedSplitsWeight, blockedDrivers, completedDrivers, cumulativeUserMemory.get(), cumulativeTotalMemory.get(), userMemory, taskMemoryContext.getRevocableMemory(), systemMemory, peakTotalMemoryInBytes.get(), peakUserMemoryInBytes.get(), queryContext.getPeakNodeTotalMemory(), totalScheduledTime, totalCpuTime, totalBlockedTime, fullyBlocked && (runningDrivers > 0 || runningPartitionedDrivers > 0), blockedReasons.build(), totalAllocation, rawInputDataSize, rawInputPositions, processedInputDataSize, processedInputPositions, outputDataSize, outputPositions, physicalWrittenDataSize, fullGcCount, fullGcTime.toMillis(), pipelineStats, mergedRuntimeStats);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) Duration(io.airlift.units.Duration) DateTime(org.joda.time.DateTime) ImmutableSet(com.google.common.collect.ImmutableSet)

Example 15 with RuntimeStats

use of com.facebook.presto.common.RuntimeStats in project presto by prestodb.

the class IcebergPageSourceProvider method createParquetPageSource.

private static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, SchemaTableName tableName, List<IcebergColumnHandle> regularColumns, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TupleDomain<IcebergColumnHandle> effectivePredicate, FileFormatDataSourceStats fileFormatDataSourceStats, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        long fileSize = fileStatus.getLen();
        long modificationTime = fileStatus.getModificationTime();
        HiveFileContext hiveFileContext = new HiveFileContext(true, NO_CACHE_CONSTRAINTS, Optional.empty(), Optional.of(fileSize), modificationTime, false);
        FSDataInputStream inputStream = fileSystem.openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, fileFormatDataSourceStats);
        ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, fileSize).getParquetMetadata();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        // Mapping from Iceberg field ID to Parquet fields.
        Map<Integer, org.apache.parquet.schema.Type> parquetIdToField = fileSchema.getFields().stream().filter(field -> field.getId() != null).collect(toImmutableMap(field -> field.getId().intValue(), Function.identity()));
        List<org.apache.parquet.schema.Type> parquetFields = regularColumns.stream().map(column -> {
            if (parquetIdToField.isEmpty()) {
                // This is a migrated table
                return getParquetTypeByName(column.getName(), fileSchema);
            }
            return parquetIdToField.get(column.getId());
        }).collect(toList());
        // TODO: support subfield pushdown
        MessageType requestedSchema = new MessageType(fileSchema.getName(), parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList()));
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        List<BlockMetaData> blocks = new ArrayList<>();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if ((firstDataPage >= start) && (firstDataPage < (start + length)) && predicateMatches(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks, dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> prestoTypes = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> internalFields = ImmutableList.builder();
        for (int columnIndex = 0; columnIndex < regularColumns.size(); columnIndex++) {
            IcebergColumnHandle column = regularColumns.get(columnIndex);
            namesBuilder.add(column.getName());
            org.apache.parquet.schema.Type parquetField = parquetFields.get(columnIndex);
            Type prestoType = column.getType();
            prestoTypes.add(prestoType);
            if (parquetField == null) {
                internalFields.add(Optional.empty());
            } else {
                internalFields.add(constructField(column.getType(), messageColumnIO.getChild(parquetField.getName())));
            }
        }
        return new ParquetPageSource(parquetReader, prestoTypes.build(), internalFields.build(), namesBuilder.build(), new RuntimeStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(ICEBERG_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) FileStatus(org.apache.hadoop.fs.FileStatus) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) MessageType(org.apache.parquet.schema.MessageType) Optional(java.util.Optional) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) Objects(java.util.Objects) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) RuntimeStats(com.facebook.presto.common.RuntimeStats) PrestoException(com.facebook.presto.spi.PrestoException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) IOException(java.io.IOException) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) MessageType(org.apache.parquet.schema.MessageType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem)

Aggregations

RuntimeStats (com.facebook.presto.common.RuntimeStats)31 DataSize (io.airlift.units.DataSize)16 StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)15 Type (com.facebook.presto.common.type.Type)11 OrcReader (com.facebook.presto.orc.OrcReader)8 ImmutableList (com.google.common.collect.ImmutableList)8 Block (com.facebook.presto.common.block.Block)7 PrestoException (com.facebook.presto.spi.PrestoException)7 IOException (java.io.IOException)7 OrcDataSource (com.facebook.presto.orc.OrcDataSource)6 OrcReaderOptions (com.facebook.presto.orc.OrcReaderOptions)6 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)6 OrcBatchRecordReader (com.facebook.presto.orc.OrcBatchRecordReader)5 RaptorOrcAggregatedMemoryContext (com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 Test (org.testng.annotations.Test)5 TypeManager (com.facebook.presto.common.type.TypeManager)4 ArrayType (com.facebook.presto.common.type.ArrayType)3 DecimalType (com.facebook.presto.common.type.DecimalType)3 OrcAggregatedMemoryContext (com.facebook.presto.orc.OrcAggregatedMemoryContext)3