Search in sources :

Example 1 with REGULAR

use of com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR in project presto by prestodb.

the class IcebergPageSourceProvider method getFileOrcColumns.

private static List<IcebergOrcColumn> getFileOrcColumns(OrcReader reader) {
    List<OrcType> orcTypes = reader.getFooter().getTypes();
    OrcType rootOrcType = orcTypes.get(ROOT_COLUMN_ID);
    List<IcebergOrcColumn> columnAttributes = ImmutableList.of();
    if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.STRUCT) {
        columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()).mapToObj(fieldId -> new IcebergOrcColumn(fieldId, rootOrcType.getFieldTypeIndex(fieldId), // so we use 'Optional.empty()' temporarily.
        Optional.empty(), rootOrcType.getFieldName(fieldId), REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())).collect(toImmutableList());
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.LIST) {
        columnAttributes = ImmutableList.of(new IcebergOrcColumn(0, rootOrcType.getFieldTypeIndex(0), Optional.empty(), "item", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()));
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.MAP) {
        columnAttributes = ImmutableList.of(new IcebergOrcColumn(0, rootOrcType.getFieldTypeIndex(0), Optional.empty(), "key", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(0)).getAttributes()), new IcebergOrcColumn(1, rootOrcType.getFieldTypeIndex(1), Optional.empty(), "value", REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(1)).getAttributes()));
    } else if (rootOrcType.getOrcTypeKind() == OrcType.OrcTypeKind.UNION) {
        columnAttributes = IntStream.range(0, rootOrcType.getFieldCount()).mapToObj(fieldId -> new IcebergOrcColumn(fieldId, rootOrcType.getFieldTypeIndex(fieldId), Optional.empty(), "field" + fieldId, REGULAR, orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getOrcTypeKind(), orcTypes.get(rootOrcType.getFieldTypeIndex(fieldId)).getAttributes())).collect(toImmutableList());
    }
    return columnAttributes;
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) OrcType(com.facebook.presto.orc.metadata.OrcType)

Example 2 with REGULAR

use of com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR in project presto by prestodb.

the class ParquetHiveRecordCursor method createParquetRecordReader.

private ParquetRecordReader<FakeParquetRecord> createParquetRecordReader(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = hdfsEnvironment.doAs(sessionUser, () -> ParquetFileReader.readFooter(configuration, path, NO_FILTER));
        List<BlockMetaData> blocks = parquetMetadata.getBlocks();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        PrestoReadSupport readSupport = new PrestoReadSupport(useParquetColumnNames, columns, fileSchema);
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        LongArrayList offsets = new LongArrayList(blocks.size());
        for (BlockMetaData block : blocks) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                if (predicatePushdownEnabled) {
                    ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
                    if (predicateMatches(parquetPredicate, block, dataSource, requestedSchema, effectivePredicate)) {
                        offsets.add(block.getStartingPos());
                    }
                } else {
                    offsets.add(block.getStartingPos());
                }
            }
        }
        ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets.toLongArray());
        TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(configuration, new TaskAttemptID());
        return hdfsEnvironment.doAs(sessionUser, () -> {
            ParquetRecordReader<FakeParquetRecord> realReader = new PrestoParquetRecordReader(readSupport);
            realReader.initialize(split, taskContext);
            return realReader;
        });
    } catch (Exception e) {
        Throwables.propagateIfInstanceOf(e, PrestoException.class);
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
            throw Throwables.propagate(e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    } finally {
        if (dataSource != null) {
            try {
                dataSource.close();
            } catch (IOException ignored) {
            }
        }
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Arrays(java.util.Arrays) Block(com.facebook.presto.spi.block.Block) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CURSOR_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) Slices.wrappedBuffer(io.airlift.slice.Slices.wrappedBuffer) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) DecimalType(com.facebook.presto.spi.type.DecimalType) DecimalMetadata(parquet.schema.DecimalMetadata) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) PrimitiveType(parquet.schema.PrimitiveType) MAP_KEY_VALUE(parquet.schema.OriginalType.MAP_KEY_VALUE) Decimals(com.facebook.presto.spi.type.Decimals) ReadSupport(parquet.hadoop.api.ReadSupport) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) Math.min(java.lang.Math.min) Chars.trimSpacesAndTruncateToLength(com.facebook.presto.spi.type.Chars.trimSpacesAndTruncateToLength) Binary(parquet.io.api.Binary) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) ROW(com.facebook.presto.spi.type.StandardTypes.ROW) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) NO_FILTER(parquet.format.converter.ParquetMetadataConverter.NO_FILTER) Optional(java.util.Optional) Math.max(java.lang.Math.max) Varchars.truncateToLength(com.facebook.presto.spi.type.Varchars.truncateToLength) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) RecordMaterializer(parquet.io.api.RecordMaterializer) Converter(parquet.io.api.Converter) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) GroupConverter(parquet.io.api.GroupConverter) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Slice(io.airlift.slice.Slice) ParquetFileReader(parquet.hadoop.ParquetFileReader) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ParquetRecordReader(parquet.hadoop.ParquetRecordReader) PrestoException(com.facebook.presto.spi.PrestoException) PrimitiveConverter(parquet.io.api.PrimitiveConverter) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) MAP(com.facebook.presto.spi.type.StandardTypes.MAP) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) DecimalUtils(com.facebook.presto.hive.util.DecimalUtils) ARRAY(com.facebook.presto.spi.type.StandardTypes.ARRAY) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) HiveUtil.closeWithSuppression(com.facebook.presto.hive.HiveUtil.closeWithSuppression) Type(com.facebook.presto.spi.type.Type) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) DECIMAL(parquet.schema.OriginalType.DECIMAL) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus) Dictionary(parquet.column.Dictionary) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) ContextUtil(parquet.hadoop.util.ContextUtil) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) Collectors.toList(java.util.stream.Collectors.toList) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) PrestoException(com.facebook.presto.spi.PrestoException) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) LongArrayList(it.unimi.dsi.fastutil.longs.LongArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) DecimalType(com.facebook.presto.spi.type.DecimalType) PrimitiveType(parquet.schema.PrimitiveType) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) Varchars.isVarcharType(com.facebook.presto.spi.type.Varchars.isVarcharType) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) Type(com.facebook.presto.spi.type.Type) MessageType(parquet.schema.MessageType) HiveUtil.getDecimalType(com.facebook.presto.hive.HiveUtil.getDecimalType) GroupType(parquet.schema.GroupType) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) ParquetInputSplit(parquet.hadoop.ParquetInputSplit)

Example 3 with REGULAR

use of com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ParquetPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, boolean useParquetColumnNames, TypeManager typeManager, boolean predicatePushdownEnabled, TupleDomain<HiveColumnHandle> effectivePredicate) {
    AggregatedMemoryContext systemMemoryContext = new AggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        List<parquet.schema.Type> fields = columns.stream().filter(column -> column.getColumnType() == REGULAR).map(column -> getParquetType(column, fileSchema, useParquetColumnNames)).filter(Objects::nonNull).collect(toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);
        List<BlockMetaData> blocks = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                blocks.add(block);
            }
        }
        if (predicatePushdownEnabled) {
            ParquetPredicate parquetPredicate = buildParquetPredicate(columns, effectivePredicate, fileMetaData.getSchema(), typeManager);
            final ParquetDataSource finalDataSource = dataSource;
            blocks = blocks.stream().filter(block -> predicateMatches(parquetPredicate, block, finalDataSource, requestedSchema, effectivePredicate)).collect(toList());
        }
        ParquetReader parquetReader = new ParquetReader(fileSchema, requestedSchema, blocks, dataSource, typeManager, systemMemoryContext);
        return new ParquetPageSource(parquetReader, dataSource, fileSchema, requestedSchema, length, schema, columns, effectivePredicate, typeManager, useParquetColumnNames, systemMemoryContext);
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.isParquetOptimizedReaderEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedReaderEnabled) ArrayList(java.util.ArrayList) ParquetReader(com.facebook.presto.hive.parquet.reader.ParquetReader) Inject(javax.inject.Inject) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) Configuration(org.apache.hadoop.conf.Configuration) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) ParquetMetadataReader(com.facebook.presto.hive.parquet.reader.ParquetMetadataReader) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) HiveSessionProperties.isParquetPredicatePushdownEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPredicatePushdownEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) MessageType(parquet.schema.MessageType) Properties(java.util.Properties) ParquetPredicateUtils.predicateMatches(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.predicateMatches) Set(java.util.Set) IOException(java.io.IOException) AggregatedMemoryContext(com.facebook.presto.hive.parquet.memory.AggregatedMemoryContext) FileMetaData(parquet.hadoop.metadata.FileMetaData) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) HivePageSourceFactory(com.facebook.presto.hive.HivePageSourceFactory) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) Objects(java.util.Objects) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) HiveUtil.getDeserializerClassName(com.facebook.presto.hive.HiveUtil.getDeserializerClassName) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) BlockMetaData(parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(parquet.hadoop.metadata.ParquetMetadata) ArrayList(java.util.ArrayList) ParquetReader(com.facebook.presto.hive.parquet.reader.ParquetReader) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) AggregatedMemoryContext(com.facebook.presto.hive.parquet.memory.AggregatedMemoryContext) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ParquetTypeUtils.getParquetType(com.facebook.presto.hive.parquet.ParquetTypeUtils.getParquetType) MessageType(parquet.schema.MessageType) FileSystem(org.apache.hadoop.fs.FileSystem) FileMetaData(parquet.hadoop.metadata.FileMetaData) MessageType(parquet.schema.MessageType) ParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicate) ParquetPredicateUtils.buildParquetPredicate(com.facebook.presto.hive.parquet.predicate.ParquetPredicateUtils.buildParquetPredicate)

Example 4 with REGULAR

use of com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR in project presto by prestodb.

the class HiveUtil method createRecordReader.

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = new JobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
    // propagate serialization configuration to getRecordReader
    schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization.")).forEach(name -> jobConf.set(name, schema.getProperty(name)));
    try {
        return retry().stopOnIllegalExceptions().run("createRecordReader", () -> inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL));
    } catch (Exception e) {
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), e.getMessage()), e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_DEFAULT_DYNAMIC_PARTITION(com.facebook.presto.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION) DECIMAL_TYPE_NAME(org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME) Short.parseShort(java.lang.Short.parseShort) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) BOOLEAN(com.facebook.presto.spi.type.BooleanType.BOOLEAN) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) BigInteger(java.math.BigInteger) StandardTypes(com.facebook.presto.spi.type.StandardTypes) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Double.parseDouble(java.lang.Double.parseDouble) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) Decimals(com.facebook.presto.spi.type.Decimals) TINYINT(com.facebook.presto.spi.type.TinyintType.TINYINT) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) HIVE_SERDE_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) InvocationTargetException(java.lang.reflect.InvocationTargetException) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) DateTimePrinter(org.joda.time.format.DateTimePrinter) RecordReader(org.apache.hadoop.mapred.RecordReader) Iterables.filter(com.google.common.collect.Iterables.filter) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DOUBLE(com.facebook.presto.spi.type.DoubleType.DOUBLE) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Byte.parseByte(java.lang.Byte.parseByte) Type(com.facebook.presto.spi.type.Type) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) Nullable(javax.annotation.Nullable) Properties(java.util.Properties) Reporter(org.apache.hadoop.mapred.Reporter) HiveColumnHandle.pathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle) Throwables(com.google.common.base.Throwables) HiveColumnHandle.isBucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) VarcharType(com.facebook.presto.spi.type.VarcharType) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Long.parseLong(java.lang.Long.parseLong) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) DateTimeParser(org.joda.time.format.DateTimeParser) HIVE_INVALID_VIEW_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) Float.parseFloat(java.lang.Float.parseFloat) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) DecimalType(com.facebook.presto.spi.type.DecimalType) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) NullableValue(com.facebook.presto.spi.predicate.NullableValue) Method(java.lang.reflect.Method) SliceUtf8(io.airlift.slice.SliceUtf8) DateTimeFormat(org.joda.time.format.DateTimeFormat) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RetryDriver.retry(com.facebook.presto.hive.RetryDriver.retry) String.format(java.lang.String.format) RecordCursor(com.facebook.presto.spi.RecordCursor) Base64(java.util.Base64) List(java.util.List) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Chars.trimSpaces(com.facebook.presto.spi.type.Chars.trimSpaces) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) Pattern(java.util.regex.Pattern) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) FileUtils.unescapePathName(org.apache.hadoop.hive.common.FileUtils.unescapePathName) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Column(com.facebook.presto.hive.metastore.Column) ROUND_UNNECESSARY(java.math.BigDecimal.ROUND_UNNECESSARY) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Lists.transform(com.google.common.collect.Lists.transform) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SMALLINT(com.facebook.presto.spi.type.SmallintType.SMALLINT) Integer.parseInt(java.lang.Integer.parseInt) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) CharType(com.facebook.presto.spi.type.CharType) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) DATE(com.facebook.presto.spi.type.DateType.DATE) REAL(com.facebook.presto.spi.type.RealType.REAL) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) BigInteger(java.math.BigInteger) PrestoException(com.facebook.presto.spi.PrestoException) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) InvocationTargetException(java.lang.reflect.InvocationTargetException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PrestoException(com.facebook.presto.spi.PrestoException)

Example 5 with REGULAR

use of com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
        ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
        if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
        }
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
        MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
        ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                footerBlocks.add(block);
            }
        }
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : footerBlocks.build()) {
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
                hiveFileContext.incrementCounter("parquet.blocksRead", 1);
                hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
            } else {
                hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
                hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        for (HiveColumnHandle column : columns) {
            checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
            String name = column.getName();
            Type type = typeManager.getType(column.getTypeSignature());
            namesBuilder.add(name);
            typesBuilder.add(type);
            if (column.getColumnType() == SYNTHESIZED) {
                Subfield pushedDownSubfield = getPushedDownSubfield(column);
                List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
                Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
                if (columnIO.isPresent()) {
                    fieldsBuilder.add(constructField(type, columnIO.get()));
                } else {
                    fieldsBuilder.add(Optional.empty());
                }
            } else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
                String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
                fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
            } else {
                fieldsBuilder.add(Optional.empty());
            }
        }
        return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, e);
        }
        if (e instanceof AccessControlException) {
            throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
        }
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DateTimeZone(org.joda.time.DateTimeZone) TINYINT(com.facebook.presto.common.type.StandardTypes.TINYINT) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) ROW(com.facebook.presto.common.type.StandardTypes.ROW) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CHAR(com.facebook.presto.common.type.StandardTypes.CHAR) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) ColumnIO(org.apache.parquet.io.ColumnIO) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ArrayList(java.util.ArrayList) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) DATE(com.facebook.presto.common.type.StandardTypes.DATE) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) Domain(com.facebook.presto.common.predicate.Domain) INTEGER(com.facebook.presto.common.type.StandardTypes.INTEGER) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) REAL(com.facebook.presto.common.type.StandardTypes.REAL) RowType(com.facebook.presto.common.type.RowType) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MAP(com.facebook.presto.common.type.StandardTypes.MAP) Locale(java.util.Locale) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) BIGINT(com.facebook.presto.common.type.StandardTypes.BIGINT) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) VARCHAR(com.facebook.presto.common.type.StandardTypes.VARCHAR) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) Entry(java.util.Map.Entry) Optional(java.util.Optional) TIMESTAMP(com.facebook.presto.common.type.StandardTypes.TIMESTAMP) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) SMALLINT(com.facebook.presto.common.type.StandardTypes.SMALLINT) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) DECIMAL(com.facebook.presto.common.type.StandardTypes.DECIMAL) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) VARBINARY(com.facebook.presto.common.type.StandardTypes.VARBINARY) Storage(com.facebook.presto.hive.metastore.Storage) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) AccessControlException(org.apache.hadoop.security.AccessControlException) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveSessionProperties.columnIndexFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.columnIndexFilterEnabled) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList(com.google.common.collect.ImmutableList) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) MessageType(org.apache.parquet.schema.MessageType) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) Optional(java.util.Optional) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) AccessControlException(org.apache.hadoop.security.AccessControlException) IOException(java.io.IOException) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ColumnIO(org.apache.parquet.io.ColumnIO) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) AccessControlException(org.apache.hadoop.security.AccessControlException) RowType(com.facebook.presto.common.type.RowType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Type(com.facebook.presto.common.type.Type) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Aggregations

REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)8 PrestoException (com.facebook.presto.spi.PrestoException)7 String.format (java.lang.String.format)7 List (java.util.List)7 Objects.requireNonNull (java.util.Objects.requireNonNull)7 Optional (java.util.Optional)7 Configuration (org.apache.hadoop.conf.Configuration)7 Path (org.apache.hadoop.fs.Path)7 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)6 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)6 HIVE_CANNOT_OPEN_SPLIT (com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT)6 ImmutableList (com.google.common.collect.ImmutableList)6 IOException (java.io.IOException)6 Type (com.facebook.presto.common.type.Type)5 TypeManager (com.facebook.presto.common.type.TypeManager)5 HdfsParquetDataSource.buildHdfsParquetDataSource (com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource)5 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)5 Map (java.util.Map)5 DateTimeZone (org.joda.time.DateTimeZone)5 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)4