Search in sources :

Example 1 with ROOT_COLUMN_ID

use of com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID in project presto by prestodb.

the class IcebergPageSourceProvider method getFileOrcColumns.

private static List<IcebergOrcColumn> getFileOrcColumns(OrcReader reader) {
    List<OrcType> orcTypes = reader.getFooter().getTypes();
    OrcType rootOrcType = orcTypes.get(ROOT_COLUMN_ID);
    List<IcebergOrcColumn> columnAttributes = ImmutableList.of();
    if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.STRUCT) {
        columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()).mapToObj(fieldId -> new IcebergOrcColumn(fieldId, rootOrcType.getFieldTypeIndex(fieldId), // so we use 'Optional.empty()' temporarily.
        Optional.empty(), rootOrcType.getFieldName(fieldId), REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())).collect(toImmutableList());
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.LIST) {
        columnAttributes = ImmutableList.of(new IcebergOrcColumn(0, rootOrcType.getFieldTypeIndex(0), Optional.empty(), "item", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()));
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.MAP) {
        columnAttributes = ImmutableList.of(new IcebergOrcColumn(0, rootOrcType.getFieldTypeIndex(0), Optional.empty(), "key", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()), new IcebergOrcColumn(1, rootOrcType.getFieldTypeIndex(1), Optional.empty(), "value", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getAttributes()));
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.UNION) {
        columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()).mapToObj(fieldId -> new IcebergOrcColumn(fieldId, rootOrcType.getFieldTypeIndex(fieldId), Optional.empty(), "field" + fieldId, REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())).collect(toImmutableList());
    }
    return columnAttributes;
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) OrcType(com.facebook.presto.orc.metadata.OrcType)

Aggregations

RuntimeStats (com.facebook.presto.common.RuntimeStats)1 Domain (com.facebook.presto.common.predicate.Domain)1 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)1 StandardTypes (com.facebook.presto.common.type.StandardTypes)1 Type (com.facebook.presto.common.type.Type)1 TypeManager (com.facebook.presto.common.type.TypeManager)1 NO_CACHE_CONSTRAINTS (com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS)1 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)1 FileFormatDataSourceStats (com.facebook.presto.hive.FileFormatDataSourceStats)1 HdfsContext (com.facebook.presto.hive.HdfsContext)1 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)1 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)1 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)1 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)1 HiveDwrfEncryptionProvider (com.facebook.presto.hive.HiveDwrfEncryptionProvider)1 HiveFileContext (com.facebook.presto.hive.HiveFileContext)1 HiveOrcAggregatedMemoryContext (com.facebook.presto.hive.HiveOrcAggregatedMemoryContext)1 HiveSessionProperties.getParquetMaxReadBlockSize (com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize)1 HiveSessionProperties.isParquetBatchReaderVerificationEnabled (com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled)1 HiveSessionProperties.isParquetBatchReadsEnabled (com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled)1