Search in sources :

Example 16 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class HiveMetadata method finishCreateTable.

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
    List<PartitionUpdate> partitionUpdates = getPartitionUpdates(session, fragments);
    Map<String, String> tableEncryptionParameters = ImmutableMap.of();
    Map<String, String> partitionEncryptionParameters = ImmutableMap.of();
    if (handle.getEncryptionInformation().isPresent()) {
        EncryptionInformation encryptionInformation = handle.getEncryptionInformation().get();
        if (encryptionInformation.getDwrfEncryptionMetadata().isPresent()) {
            if (handle.getPartitionedBy().isEmpty()) {
                tableEncryptionParameters = ImmutableMap.copyOf(encryptionInformation.getDwrfEncryptionMetadata().get().getExtraMetadata());
            } else {
                partitionEncryptionParameters = ImmutableMap.copyOf(encryptionInformation.getDwrfEncryptionMetadata().get().getExtraMetadata());
            }
        }
    }
    WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
    MetastoreContext metastoreContext = getMetastoreContext(session);
    Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getPreferredOrderingColumns(), ImmutableMap.<String, String>builder().putAll(handle.getAdditionalTableParameters()).putAll(tableEncryptionParameters).build(), writeInfo.getTargetPath(), MANAGED_TABLE, prestoVersion, metastoreContext);
    PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(handle.getTableOwner());
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, partitionUpdates);
        // replace partitionUpdates before creating the zero-row files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        HdfsContext hdfsContext = new HdfsContext(session, table.getDatabaseName(), table.getTableName(), table.getStorage().getLocation(), true);
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(partitionObjectBuilder.buildPartitionObject(session, table, partitionUpdate, prestoVersion, partitionEncryptionParameters));
            zeroRowFileCreator.createFiles(session, hdfsContext, partitionUpdate.getWritePath(), getTargetFileNames(partitionUpdate.getFileWriteInfos()), getStorageFormat(partition, table), handle.getCompressionCodec(), getSchema(partition, table));
        }
    }
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
    PartitionStatistics tableStatistics;
    if (table.getPartitionColumns().isEmpty()) {
        HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> reduce(first, second, ADD)).orElse(createZeroStatistics());
        tableStatistics = createPartitionStatistics(session, basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
    } else {
        tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
    }
    metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics);
    if (handle.getPartitionedBy().isEmpty()) {
        return Optional.of(new HiveWrittenPartitions(ImmutableList.of(UNPARTITIONED_ID)));
    }
    if (isRespectTableFormat(session)) {
        verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
    }
    for (PartitionUpdate update : partitionUpdates) {
        Map<String, String> partitionParameters = partitionEncryptionParameters;
        if (isPreferManifestsToListFiles(session) && isFileRenamingEnabled(session)) {
            // Store list of file names and sizes in partition metadata when prefer_manifests_to_list_files and file_renaming_enabled are set to true
            partitionParameters = updatePartitionMetadataWithFileNamesAndSizes(update, partitionParameters);
        }
        Partition partition = partitionObjectBuilder.buildPartitionObject(session, table, update, prestoVersion, partitionParameters);
        PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
        metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), table.getStorage().getLocation(), true, partitionObjectBuilder.buildPartitionObject(session, table, update, prestoVersion, partitionParameters), update.getWritePath(), partitionStatistics);
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toList())));
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Statistics.createEmptyPartitionStatistics(com.facebook.presto.hive.metastore.Statistics.createEmptyPartitionStatistics) SORTED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.SORTED_BY_PROPERTY) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) FILE_SIZE_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) HiveSessionProperties.isPreferManifestsToListFiles(com.facebook.presto.hive.HiveSessionProperties.isPreferManifestsToListFiles) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) ComputedStatistics(com.facebook.presto.spi.statistics.ComputedStatistics) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) DWRF_ENCRYPTION_PROVIDER(com.facebook.presto.hive.HiveTableProperties.DWRF_ENCRYPTION_PROVIDER) MATERIALIZED_VIEW(com.facebook.presto.hive.metastore.PrestoTableType.MATERIALIZED_VIEW) HiveSessionProperties.isOptimizedPartitionUpdateSerializationEnabled(com.facebook.presto.hive.HiveSessionProperties.isOptimizedPartitionUpdateSerializationEnabled) Statistics.fromComputedStatistics(com.facebook.presto.hive.metastore.Statistics.fromComputedStatistics) MAX_VALUE_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES) HiveTableProperties.getPartitionedBy(com.facebook.presto.hive.HiveTableProperties.getPartitionedBy) Map(java.util.Map) LocalProperty(com.facebook.presto.spi.LocalProperty) SystemTable(com.facebook.presto.spi.SystemTable) ENGLISH(java.util.Locale.ENGLISH) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) NullableValue(com.facebook.presto.common.predicate.NullableValue) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) PATH_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME) HivePrivilegeInfo.toHivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.toHivePrivilege) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) MIN_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE) NUMBER_OF_DISTINCT_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) CSV_SEPARATOR(com.facebook.presto.hive.HiveTableProperties.CSV_SEPARATOR) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) Joiner(com.google.common.base.Joiner) ConnectorPartitioningHandle(com.facebook.presto.spi.connector.ConnectorPartitioningHandle) Table(com.facebook.presto.hive.metastore.Table) Database(com.facebook.presto.hive.metastore.Database) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HIVE_BINARY(com.facebook.presto.hive.HiveType.HIVE_BINARY) FULLY_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.FULLY_MATERIALIZED) HiveBucketHandle.createVirtualBucketHandle(com.facebook.presto.hive.HiveBucketHandle.createVirtualBucketHandle) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) HiveTableProperties.getBucketProperty(com.facebook.presto.hive.HiveTableProperties.getBucketProperty) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) TableStatisticType(com.facebook.presto.spi.statistics.TableStatisticType) Supplier(java.util.function.Supplier) PAGEFILE(com.facebook.presto.hive.HiveStorageFormat.PAGEFILE) HiveMaterializedViewUtils.viewToBaseTableOnOuterJoinSideIndirectMappedPartitions(com.facebook.presto.hive.HiveMaterializedViewUtils.viewToBaseTableOnOuterJoinSideIndirectMappedPartitions) OptionalLong(java.util.OptionalLong) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) Lists(com.google.common.collect.Lists) MaterializedDataPredicates(com.facebook.presto.spi.MaterializedViewStatus.MaterializedDataPredicates) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ENCRYPT_COLUMNS(com.facebook.presto.hive.HiveTableProperties.ENCRYPT_COLUMNS) Functions(com.google.common.base.Functions) HiveWriterFactory.computeBucketedFileName(com.facebook.presto.hive.HiveWriterFactory.computeBucketedFileName) RESPECT_TABLE_FORMAT(com.facebook.presto.hive.HiveSessionProperties.RESPECT_TABLE_FORMAT) IOException(java.io.IOException) Domain(com.facebook.presto.common.predicate.Domain) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) ConnectorNewTableLayout(com.facebook.presto.spi.ConnectorNewTableLayout) HiveUtil.hiveColumnHandles(com.facebook.presto.hive.HiveUtil.hiveColumnHandles) PRESTO_QUERY_ID_NAME(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME) TableLayoutFilterCoverage(com.facebook.presto.spi.TableLayoutFilterCoverage) HiveManifestUtils.getManifestSizeInBytes(com.facebook.presto.hive.HiveManifestUtils.getManifestSizeInBytes) CSV_QUOTE(com.facebook.presto.hive.HiveTableProperties.CSV_QUOTE) ConnectorViewDefinition(com.facebook.presto.spi.ConnectorViewDefinition) RowType(com.facebook.presto.common.type.RowType) ViewNotFoundException(com.facebook.presto.spi.ViewNotFoundException) HiveAnalyzeProperties.getPartitionList(com.facebook.presto.hive.HiveAnalyzeProperties.getPartitionList) NUMBER_OF_TRUE_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES) JsonCodec(com.facebook.airlift.json.JsonCodec) DWRF_ENCRYPTION_ALGORITHM(com.facebook.presto.hive.HiveTableProperties.DWRF_ENCRYPTION_ALGORITHM) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) HiveUtil.encodeMaterializedViewData(com.facebook.presto.hive.HiveUtil.encodeMaterializedViewData) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) Privilege(com.facebook.presto.spi.security.Privilege) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) HiveSessionProperties.getTemporaryTableSchema(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableSchema) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ADD(com.facebook.presto.hive.metastore.Statistics.ReduceOperator.ADD) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) Collectors.toMap(java.util.stream.Collectors.toMap) MaterializedViewNotFoundException(com.facebook.presto.spi.MaterializedViewNotFoundException) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) PRESTO_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_VIEW_FLAG) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) ImmutableSet(com.google.common.collect.ImmutableSet) HIVE_UNKNOWN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) INVALID_SCHEMA_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) Collection(java.util.Collection) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) SCHEMA_NOT_EMPTY(com.facebook.presto.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) DwrfTableEncryptionProperties.forPerColumn(com.facebook.presto.hive.DwrfTableEncryptionProperties.forPerColumn) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) MetastoreUtil.toPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionValues) LogicalRowExpressions.binaryExpression(com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression) TOTAL_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES) IntStream(java.util.stream.IntStream) OVERWRITE(com.facebook.presto.hive.PartitionUpdate.UpdateMode.OVERWRITE) MapType(com.facebook.presto.common.type.MapType) HiveSessionProperties.isRespectTableFormat(com.facebook.presto.hive.HiveSessionProperties.isRespectTableFormat) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) CompletableFuture(java.util.concurrent.CompletableFuture) HiveUtil.verifyPartitionTypeSupported(com.facebook.presto.hive.HiveUtil.verifyPartitionTypeSupported) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) HashSet(java.util.HashSet) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) HiveMaterializedViewUtils.getViewToBasePartitionMap(com.facebook.presto.hive.HiveMaterializedViewUtils.getViewToBasePartitionMap) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) PREFERRED_ORDERING_COLUMNS(com.facebook.presto.hive.HiveTableProperties.PREFERRED_ORDERING_COLUMNS) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) ALREADY_EXISTS(com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS) SmileCodec(com.facebook.airlift.json.smile.SmileCodec) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveWriteUtils.isWritableType(com.facebook.presto.hive.HiveWriteUtils.isWritableType) NoSuchElementException(java.util.NoSuchElementException) HiveTableProperties.getDwrfEncryptionAlgorithm(com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionAlgorithm) Type(com.facebook.presto.common.type.Type) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) USER(com.facebook.presto.spi.security.PrincipalType.USER) Storage(com.facebook.presto.hive.metastore.Storage) HivePartitioningHandle.createHiveCompatiblePartitioningHandle(com.facebook.presto.hive.HivePartitioningHandle.createHiveCompatiblePartitioningHandle) ROW_COUNT(com.facebook.presto.spi.statistics.TableStatisticType.ROW_COUNT) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) HiveTableProperties.getOrcBloomFilterColumns(com.facebook.presto.hive.HiveTableProperties.getOrcBloomFilterColumns) MoreFutures.toCompletableFuture(com.facebook.airlift.concurrent.MoreFutures.toCompletableFuture) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveMaterializedViewUtils.validateMaterializedViewPartitionColumns(com.facebook.presto.hive.HiveMaterializedViewUtils.validateMaterializedViewPartitionColumns) HiveTableProperties.getOrcBloomFilterFpp(com.facebook.presto.hive.HiveTableProperties.getOrcBloomFilterFpp) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPartitioningMetadata(com.facebook.presto.spi.connector.ConnectorPartitioningMetadata) HiveSessionProperties.getTemporaryTableStorageFormat(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableStorageFormat) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) AVRO_SCHEMA_URL_KEY(com.facebook.presto.hive.metastore.MetastoreUtil.AVRO_SCHEMA_URL_KEY) Comparator(java.util.Comparator) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) HIVE_COMPATIBLE(com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE) NUMBER_OF_NON_NULL_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveSessionProperties.isParquetPushdownFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPushdownFilterEnabled) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) EXTERNAL_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) AVRO_SCHEMA_URL(com.facebook.presto.hive.HiveTableProperties.AVRO_SCHEMA_URL) TupleDomain.withColumnDomains(com.facebook.presto.common.predicate.TupleDomain.withColumnDomains) HiveSessionProperties.getBucketFunctionTypeForExchange(com.facebook.presto.hive.HiveSessionProperties.getBucketFunctionTypeForExchange) HiveMaterializedViewUtils.getMaterializedDataPredicates(com.facebook.presto.hive.HiveMaterializedViewUtils.getMaterializedDataPredicates) HiveTableProperties.getCsvProperty(com.facebook.presto.hive.HiveTableProperties.getCsvProperty) Predicates.not(com.google.common.base.Predicates.not) NOT_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.NOT_MATERIALIZED) ConnectorMaterializedViewDefinition(com.facebook.presto.spi.ConnectorMaterializedViewDefinition) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) FileWriteInfo(com.facebook.presto.hive.PartitionUpdate.FileWriteInfo) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrivilegeInfo(com.facebook.presto.spi.security.PrivilegeInfo) TEMPORARY_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE) INVALID_TABLE_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveUtil.decodeMaterializedViewData(com.facebook.presto.hive.HiveUtil.decodeMaterializedViewData) HiveManifestUtils.updatePartitionMetadataWithFileNamesAndSizes(com.facebook.presto.hive.HiveManifestUtils.updatePartitionMetadataWithFileNamesAndSizes) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) HIVE_STORAGE_FORMAT(com.facebook.presto.hive.HiveSessionProperties.HIVE_STORAGE_FORMAT) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TypeUtils.isNumericType(com.facebook.presto.common.type.TypeUtils.isNumericType) JsonCodec.jsonCodec(com.facebook.airlift.json.JsonCodec.jsonCodec) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CSV_ESCAPE(com.facebook.presto.hive.HiveTableProperties.CSV_ESCAPE) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) TableStatisticsMetadata(com.facebook.presto.spi.statistics.TableStatisticsMetadata) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) HiveColumnHandle.updateRowIdHandle(com.facebook.presto.hive.HiveColumnHandle.updateRowIdHandle) HiveSessionProperties.getTemporaryTableCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableCompressionCodec) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Iterables(com.google.common.collect.Iterables) HiveSessionProperties.isShufflePartitionedColumnsForTableWriteEnabled(com.facebook.presto.hive.HiveSessionProperties.isShufflePartitionedColumnsForTableWriteEnabled) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) HiveSessionProperties.isCollectColumnStatisticsOnWrite(com.facebook.presto.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) ENCRYPT_TABLE(com.facebook.presto.hive.HiveTableProperties.ENCRYPT_TABLE) HiveTableProperties.getPreferredOrderingColumns(com.facebook.presto.hive.HiveTableProperties.getPreferredOrderingColumns) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) WriteInfo(com.facebook.presto.hive.LocationService.WriteInfo) HiveStatisticsProvider(com.facebook.presto.hive.statistics.HiveStatisticsProvider) HiveBasicStatistics.createEmptyStatistics(com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics) DATE(com.facebook.presto.common.type.DateType.DATE) Builder(com.google.common.collect.ImmutableMap.Builder) ArrayList(java.util.ArrayList) SortingProperty(com.facebook.presto.spi.SortingProperty) HiveTableProperties.getDwrfEncryptionProvider(com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionProvider) DwrfTableEncryptionProperties.fromHiveTableProperties(com.facebook.presto.hive.DwrfTableEncryptionProperties.fromHiveTableProperties) HiveUtil.encodeViewData(com.facebook.presto.hive.HiveUtil.encodeViewData) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) HiveWriteUtils.checkTableIsWritable(com.facebook.presto.hive.HiveWriteUtils.checkTableIsWritable) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) ADMIN_ROLE_NAME(com.facebook.presto.hive.security.SqlStandardAccessControl.ADMIN_ROLE_NAME) ConnectorMetadataUpdateHandle(com.facebook.presto.spi.ConnectorMetadataUpdateHandle) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) COVERED(com.facebook.presto.spi.TableLayoutFilterCoverage.COVERED) HiveSessionProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveSessionProperties.getHiveStorageFormat) HiveSessionProperties.isOptimizedMismatchedBucketCount(com.facebook.presto.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Properties(java.util.Properties) HiveSessionProperties.getCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getCompressionCodec) HIVE_CONCURRENT_MODIFICATION_DETECTED(com.facebook.presto.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) Constraint(com.facebook.presto.spi.Constraint) File(java.io.File) PRESTO_MATERIALIZED_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_MATERIALIZED_VIEW_FLAG) Streams.stream(com.google.common.collect.Streams.stream) ColumnWithStructSubfield(com.facebook.presto.hive.ColumnEncryptionInformation.ColumnWithStructSubfield) INVALID_VIEW(com.facebook.presto.spi.StandardErrorCode.INVALID_VIEW) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveTableProperties.getEncryptColumns(com.facebook.presto.hive.HiveTableProperties.getEncryptColumns) INVALID_ANALYZE_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) QueryId(com.facebook.presto.spi.QueryId) HiveTableProperties.getAvroSchemaUrl(com.facebook.presto.hive.HiveTableProperties.getAvroSchemaUrl) HiveMaterializedViewUtils.differenceDataPredicates(com.facebook.presto.hive.HiveMaterializedViewUtils.differenceDataPredicates) URL(java.net.URL) HiveTableProperties.getExternalLocation(com.facebook.presto.hive.HiveTableProperties.getExternalLocation) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) HiveSessionProperties.isUsePageFileForHiveUnsupportedType(com.facebook.presto.hive.HiveSessionProperties.isUsePageFileForHiveUnsupportedType) FILE_MODIFIED_TIME_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) HiveSessionProperties.isBucketExecutionEnabled(com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled) Iterables.concat(com.google.common.collect.Iterables.concat) HIVE_COLUMN_ORDER_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) MANAGED_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Splitter(com.google.common.base.Splitter) Collectors.toSet(java.util.stream.Collectors.toSet) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.isSortedWriteToTempPathEnabled(com.facebook.presto.hive.HiveSessionProperties.isSortedWriteToTempPathEnabled) ORC_BLOOM_FILTER_FPP(com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) ImmutableMap(com.google.common.collect.ImmutableMap) HivePartitioningHandle.createPrestoNativePartitioningHandle(com.facebook.presto.hive.HivePartitioningHandle.createPrestoNativePartitioningHandle) NEW(com.facebook.presto.hive.PartitionUpdate.UpdateMode.NEW) NOT_COVERED(com.facebook.presto.spi.TableLayoutFilterCoverage.NOT_COVERED) Predicate(java.util.function.Predicate) Collections.emptyList(java.util.Collections.emptyList) MAX_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE) HiveSessionProperties.isStreamingAggregationEnabled(com.facebook.presto.hive.HiveSessionProperties.isStreamingAggregationEnabled) Sets(com.google.common.collect.Sets) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) RoleGrant(com.facebook.presto.spi.security.RoleGrant) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) HIVE_TIMEZONE_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_TIMEZONE_MISMATCH) ORC_BLOOM_FILTER_COLUMNS(com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) TOO_MANY_PARTITIONS_MISSING(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.TOO_MANY_PARTITIONS_MISSING) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) PRESTO_NATIVE(com.facebook.presto.hive.BucketFunctionType.PRESTO_NATIVE) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) FilterStatsCalculatorService(com.facebook.presto.spi.plan.FilterStatsCalculatorService) HiveWriterFactory.getFileExtension(com.facebook.presto.hive.HiveWriterFactory.getFileExtension) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) HiveSessionProperties.isSortedWritingEnabled(com.facebook.presto.hive.HiveSessionProperties.isSortedWritingEnabled) Column(com.facebook.presto.hive.metastore.Column) ColumnStatisticMetadata(com.facebook.presto.spi.statistics.ColumnStatisticMetadata) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) PrestoException(com.facebook.presto.spi.PrestoException) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Verify.verify(com.google.common.base.Verify.verify) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getVirtualBucketCount(com.facebook.presto.hive.HiveSessionProperties.getVirtualBucketCount) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) Suppliers(com.google.common.base.Suppliers) HiveSessionProperties.isFileRenamingEnabled(com.facebook.presto.hive.HiveSessionProperties.isFileRenamingEnabled) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) RowExpression(com.facebook.presto.spi.relation.RowExpression) VerifyException(com.google.common.base.VerifyException) GrantInfo(com.facebook.presto.spi.security.GrantInfo) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) NOT_APPLICABLE(com.facebook.presto.spi.TableLayoutFilterCoverage.NOT_APPLICABLE) MalformedURLException(java.net.MalformedURLException) HIVE_UNSUPPORTED_ENCRYPTION_OPERATION(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_ENCRYPTION_OPERATION) Statistics.reduce(com.facebook.presto.hive.metastore.Statistics.reduce) ConnectorOutputMetadata(com.facebook.presto.spi.connector.ConnectorOutputMetadata) EXTERNAL_LOCATION_PROPERTY(com.facebook.presto.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) HiveSessionProperties.isCreateEmptyBucketFiles(com.facebook.presto.hive.HiveSessionProperties.isCreateEmptyBucketFiles) VIEW_STORAGE_FORMAT(com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) HiveUtil.deserializeZstdCompressed(com.facebook.presto.hive.HiveUtil.deserializeZstdCompressed) Maps(com.google.common.collect.Maps) ThriftMetastoreUtil.listEnabledPrincipals(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.listEnabledPrincipals) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) VIRTUAL_VIEW(com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW) ConnectorIdentity(com.facebook.presto.spi.security.ConnectorIdentity) HiveBasicStatistics.createZeroStatistics(com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics) PARTIALLY_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.PARTIALLY_MATERIALIZED) UUID.randomUUID(java.util.UUID.randomUUID) ThriftMetastoreUtil(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) HIVE_PARTITION_READ_ONLY(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_READ_ONLY) HiveStorageFormat.values(com.facebook.presto.hive.HiveStorageFormat.values) APPEND(com.facebook.presto.hive.PartitionUpdate.UpdateMode.APPEND) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) HiveUtil.decodeViewData(com.facebook.presto.hive.HiveUtil.decodeViewData) MaterializedViewStatus(com.facebook.presto.spi.MaterializedViewStatus) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveSessionProperties.getOrcCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getOrcCompressionCodec) Block(com.facebook.presto.common.block.Block) Statistics.createComputedStatisticsToPartitionMap(com.facebook.presto.hive.metastore.Statistics.createComputedStatisticsToPartitionMap) Partition(com.facebook.presto.hive.metastore.Partition) SystemTable(com.facebook.presto.spi.SystemTable) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) Table(com.facebook.presto.hive.metastore.Table) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) TableStatisticType(com.facebook.presto.spi.statistics.TableStatisticType) RowType(com.facebook.presto.common.type.RowType) MapType(com.facebook.presto.common.type.MapType) HiveWriteUtils.isWritableType(com.facebook.presto.hive.HiveWriteUtils.isWritableType) Type(com.facebook.presto.common.type.Type) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) TypeUtils.isNumericType(com.facebook.presto.common.type.TypeUtils.isNumericType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) ArrayType(com.facebook.presto.common.type.ArrayType) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) HiveSessionProperties.isUsePageFileForHiveUnsupportedType(com.facebook.presto.hive.HiveSessionProperties.isUsePageFileForHiveUnsupportedType) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) Statistics.createEmptyPartitionStatistics(com.facebook.presto.hive.metastore.Statistics.createEmptyPartitionStatistics) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) ComputedStatistics(com.facebook.presto.spi.statistics.ComputedStatistics) Statistics.fromComputedStatistics(com.facebook.presto.hive.metastore.Statistics.fromComputedStatistics) FileWriteInfo(com.facebook.presto.hive.PartitionUpdate.FileWriteInfo) WriteInfo(com.facebook.presto.hive.LocationService.WriteInfo) HiveAnalyzeProperties.getPartitionList(com.facebook.presto.hive.HiveAnalyzeProperties.getPartitionList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List)

Example 17 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class ParquetFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).build();
    CompressionCodecName compressionCodecName = getCompression(conf);
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new ParquetFileWriter(fileSystem.create(path), rollbackAction, fileColumnNames, fileColumnTypes, parquetWriterOptions, fileInputColumnIndexes, compressionCodecName));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) Inject(com.google.inject.Inject) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) HiveSessionProperties.isParquetOptimizedWriterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedWriterEnabled) NodeVersion(com.facebook.presto.hive.NodeVersion) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) ParquetOutputFormat(org.apache.parquet.hadoop.ParquetOutputFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveFileWriterFactory(com.facebook.presto.hive.HiveFileWriterFactory) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) HiveFileWriter(com.facebook.presto.hive.HiveFileWriter) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions)

Example 18 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class ParquetPageSourceFactory method createParquetPageSource.

public static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, SchemaTableName tableName, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, StandardFunctionResolution functionResolution, TupleDomain<HiveColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, HiveFileContext hiveFileContext, ParquetMetadataSource parquetMetadataSource, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
        ParquetMetadata parquetMetadata = parquetMetadataSource.getParquetMetadata(dataSource, fileSize, hiveFileContext.isCacheable()).getParquetMetadata();
        if (!columns.isEmpty() && columns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedParquetPageSource(columns, parquetMetadata, typeManager, functionResolution);
        }
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getTypeSignature()), fileSchema, useParquetColumnNames, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
        MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
        ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                footerBlocks.add(block);
            }
        }
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : footerBlocks.build()) {
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
                hiveFileContext.incrementCounter("parquet.blocksRead", 1);
                hiveFileContext.incrementCounter("parquet.rowsRead", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesRead", block.getTotalByteSize());
            } else {
                hiveFileContext.incrementCounter("parquet.blocksSkipped", 1);
                hiveFileContext.incrementCounter("parquet.rowsSkipped", block.getRowCount());
                hiveFileContext.incrementCounter("parquet.totalBytesSkipped", block.getTotalByteSize());
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
        for (HiveColumnHandle column : columns) {
            checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SYNTHESIZED, "column type must be regular or synthesized column");
            String name = column.getName();
            Type type = typeManager.getType(column.getTypeSignature());
            namesBuilder.add(name);
            typesBuilder.add(type);
            if (column.getColumnType() == SYNTHESIZED) {
                Subfield pushedDownSubfield = getPushedDownSubfield(column);
                List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
                Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
                if (columnIO.isPresent()) {
                    fieldsBuilder.add(constructField(type, columnIO.get()));
                } else {
                    fieldsBuilder.add(Optional.empty());
                }
            } else if (getParquetType(type, fileSchema, useParquetColumnNames, column, tableName, path).isPresent()) {
                String columnName = useParquetColumnNames ? name : fileSchema.getFields().get(column.getHiveColumnIndex()).getName();
                fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, columnName)));
            } else {
                fieldsBuilder.add(Optional.empty());
            }
        }
        return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(HIVE_BAD_DATA, e);
        }
        if (e instanceof AccessControlException) {
            throw new PrestoException(PERMISSION_DENIED, e.getMessage(), e);
        }
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) DateTimeZone(org.joda.time.DateTimeZone) TINYINT(com.facebook.presto.common.type.StandardTypes.TINYINT) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) ROW(com.facebook.presto.common.type.StandardTypes.ROW) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ParquetMetadataSource(com.facebook.presto.parquet.cache.ParquetMetadataSource) Set(java.util.Set) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ARRAY(com.facebook.presto.common.type.StandardTypes.ARRAY) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CHAR(com.facebook.presto.common.type.StandardTypes.CHAR) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) ColumnIO(org.apache.parquet.io.ColumnIO) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) ArrayList(java.util.ArrayList) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) DATE(com.facebook.presto.common.type.StandardTypes.DATE) IOException(java.io.IOException) ParquetTypeUtils.nestedColumnPath(com.facebook.presto.parquet.ParquetTypeUtils.nestedColumnPath) Domain(com.facebook.presto.common.predicate.Domain) INTEGER(com.facebook.presto.common.type.StandardTypes.INTEGER) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) REAL(com.facebook.presto.common.type.StandardTypes.REAL) RowType(com.facebook.presto.common.type.RowType) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) ParquetTypeUtils.lookupColumnByName(com.facebook.presto.parquet.ParquetTypeUtils.lookupColumnByName) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) MAP(com.facebook.presto.common.type.StandardTypes.MAP) Locale(java.util.Locale) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) BIGINT(com.facebook.presto.common.type.StandardTypes.BIGINT) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) ImmutableSet(com.google.common.collect.ImmutableSet) GroupType(org.apache.parquet.schema.GroupType) ImmutableMap(com.google.common.collect.ImmutableMap) Collectors(java.util.stream.Collectors) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) VARCHAR(com.facebook.presto.common.type.StandardTypes.VARCHAR) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) Entry(java.util.Map.Entry) Optional(java.util.Optional) TIMESTAMP(com.facebook.presto.common.type.StandardTypes.TIMESTAMP) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) SMALLINT(com.facebook.presto.common.type.StandardTypes.SMALLINT) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) DECIMAL(com.facebook.presto.common.type.StandardTypes.DECIMAL) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) VARBINARY(com.facebook.presto.common.type.StandardTypes.VARBINARY) Storage(com.facebook.presto.hive.metastore.Storage) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) PERMISSION_DENIED(com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED) Field(com.facebook.presto.parquet.Field) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) AccessControlException(org.apache.hadoop.security.AccessControlException) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveSessionProperties.columnIndexFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.columnIndexFilterEnabled) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList(com.google.common.collect.ImmutableList) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) MessageType(org.apache.parquet.schema.MessageType) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ParquetTypeUtils.columnPathFromSubfield(com.facebook.presto.parquet.ParquetTypeUtils.columnPathFromSubfield) HiveColumnHandle.getPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.getPushedDownSubfield) HiveColumnHandle.isPushedDownSubfield(com.facebook.presto.hive.HiveColumnHandle.isPushedDownSubfield) Subfield(com.facebook.presto.common.Subfield) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) Optional(java.util.Optional) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) AccessControlException(org.apache.hadoop.security.AccessControlException) IOException(java.io.IOException) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) ColumnIO(org.apache.parquet.io.ColumnIO) ColumnIOConverter.findNestedColumnIO(org.apache.parquet.io.ColumnIOConverter.findNestedColumnIO) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) AccessControlException(org.apache.hadoop.security.AccessControlException) RowType(com.facebook.presto.common.type.RowType) GroupType(org.apache.parquet.schema.GroupType) MessageType(org.apache.parquet.schema.MessageType) ParquetTypeUtils.getSubfieldType(com.facebook.presto.parquet.ParquetTypeUtils.getSubfieldType) Type(com.facebook.presto.common.type.Type) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream)

Example 19 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class OrcBatchPageSourceFactory method createOrcPageSource.

public static ConnectorPageSource createOrcPageSource(OrcEncoding orcEncoding, HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, StandardFunctionResolution functionResolution, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, HiveFileContext hiveFileContext, OrcReaderOptions orcReaderOptions, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
    checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
    OrcDataSource orcDataSource;
    try {
        FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(sessionUser, path, configuration).openFile(path, hiveFileContext);
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, orcReaderOptions.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
    try {
        DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, columns, useOrcColumnNames, path);
        OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), orcReaderOptions, hiveFileContext.isCacheable(), dwrfEncryptionProvider, dwrfKeyProvider, hiveFileContext.getStats());
        List<HiveColumnHandle> physicalColumns = getPhysicalHiveColumnHandles(columns, useOrcColumnNames, reader.getTypes(), path);
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        for (HiveColumnHandle column : physicalColumns) {
            if (column.getColumnType() == REGULAR) {
                Type type = typeManager.getType(column.getTypeSignature());
                includedColumns.put(column.getHiveColumnIndex(), type);
                columnReferences.add(new ColumnReference<>(column, column.getHiveColumnIndex(), type));
            }
        }
        if (!physicalColumns.isEmpty() && physicalColumns.stream().allMatch(hiveColumnHandle -> hiveColumnHandle.getColumnType() == AGGREGATED)) {
            return new AggregatedOrcPageSource(physicalColumns, reader.getFooter(), typeManager, functionResolution);
        }
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(effectivePredicate, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, hiveStorageTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE);
        return new OrcBatchPageSource(recordReader, reader.getOrcDataSource(), physicalColumns, typeManager, systemMemoryUsage, stats, hiveFileContext.getStats());
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e.getClass().getSimpleName().equals("BlockMissingException")) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) HiveSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.hive.HiveSessionProperties.getOrcTinyStripeThreshold) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) FixedPageSource(com.facebook.presto.spi.FixedPageSource) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) AGGREGATED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.AGGREGATED) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) HiveSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) HiveSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.hive.HiveSessionProperties.isOrcZstdJniDecompressionEnabled) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveFileContext(com.facebook.presto.hive.HiveFileContext) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) HiveBatchPageSourceFactory(com.facebook.presto.hive.HiveBatchPageSourceFactory) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxMergeDistance) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) PrestoException(com.facebook.presto.spi.PrestoException) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) HIVE_MISSING_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_MISSING_DATA) HiveSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxReadBlockSize) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getOrcMaxBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcMaxBufferSize) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) Type(com.facebook.presto.common.type.Type) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference) Storage(com.facebook.presto.hive.metastore.Storage) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) HiveSessionProperties.getOrcStreamBufferSize(com.facebook.presto.hive.HiveSessionProperties.getOrcStreamBufferSize) NO_ENCRYPTION(com.facebook.presto.orc.DwrfEncryptionProvider.NO_ENCRYPTION) IOException(java.io.IOException) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HiveUtil.getPhysicalHiveColumnHandles(com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles) OrcReader(com.facebook.presto.orc.OrcReader) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) ImmutableList(com.google.common.collect.ImmutableList) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ImmutableMap(com.google.common.collect.ImmutableMap) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) ColumnReference(com.facebook.presto.orc.TupleDomainOrcPredicate.ColumnReference)

Example 20 with TypeManager

use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.

the class IcebergPageSourceProvider method createBatchOrcPageSource.

private static ConnectorPageSource createBatchOrcPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, boolean isCacheable, List<IcebergColumnHandle> regularColumns, TypeManager typeManager, TupleDomain<IcebergColumnHandle> effectivePredicate, OrcReaderOptions options, OrcEncoding orcEncoding, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, FileFormatDataSourceStats stats, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
    OrcDataSource orcDataSource = null;
    try {
        ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        long fileSize = fileStatus.getLen();
        long modificationTime = fileStatus.getModificationTime();
        HiveFileContext hiveFileContext = new HiveFileContext(true, NO_CACHE_CONSTRAINTS, Optional.empty(), Optional.of(fileSize), modificationTime, false);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.openFile(path, hiveFileContext));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, options.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
        // Todo: pass real columns to ProjectionBasedDwrfKeyProvider instead of ImmutableList.of()
        DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, ImmutableList.of(), true, path);
        RuntimeStats runtimeStats = new RuntimeStats();
        OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), options, isCacheable, dwrfEncryptionProvider, dwrfKeyProvider, runtimeStats);
        List<HiveColumnHandle> physicalColumnHandles = new ArrayList<>(regularColumns.size());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<TupleDomainOrcPredicate.ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        List<IcebergOrcColumn> fileOrcColumns = getFileOrcColumns(reader);
        Map<Integer, IcebergOrcColumn> fileOrcColumnByIcebergId = fileOrcColumns.stream().filter(orcColumn -> orcColumn.getAttributes().containsKey(ORC_ICEBERG_ID_KEY)).collect(toImmutableMap(orcColumn -> Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY)), orcColumn -> IcebergOrcColumn.copy(orcColumn).setIcebergColumnId(Optional.of(Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY))))));
        Map<String, IcebergOrcColumn> fileOrcColumnsByName = uniqueIndex(fileOrcColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        int nextMissingColumnIndex = fileOrcColumnsByName.size();
        for (IcebergColumnHandle column : regularColumns) {
            IcebergOrcColumn icebergOrcColumn;
            boolean isExcludeColumn = false;
            if (fileOrcColumnByIcebergId.isEmpty()) {
                icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
            } else {
                icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
                if (icebergOrcColumn == null) {
                    // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
                    icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
                    if (icebergOrcColumn != null) {
                        isExcludeColumn = true;
                    }
                }
            }
            if (icebergOrcColumn != null) {
                HiveColumnHandle columnHandle = new HiveColumnHandle(// Todo: using orc file column name
                column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), icebergOrcColumn.getOrcColumnId(), icebergOrcColumn.getColumnType(), Optional.empty(), Optional.empty());
                physicalColumnHandles.add(columnHandle);
                // Skip SchemaEvolution column
                if (!isExcludeColumn) {
                    includedColumns.put(columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature()));
                    columnReferences.add(new TupleDomainOrcPredicate.ColumnReference<>(columnHandle, columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature())));
                }
            } else {
                physicalColumnHandles.add(new HiveColumnHandle(column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), nextMissingColumnIndex++, REGULAR, Optional.empty(), Optional.empty()));
            }
        }
        TupleDomain<HiveColumnHandle> hiveColumnHandleTupleDomain = effectivePredicate.transform(column -> {
            IcebergOrcColumn icebergOrcColumn;
            if (fileOrcColumnByIcebergId.isEmpty()) {
                icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
            } else {
                icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
                if (icebergOrcColumn == null) {
                    // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
                    icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
                }
            }
            return new HiveColumnHandle(column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), // Note: the HiveColumnHandle.hiveColumnIndex starts from '0' while the IcebergColumnHandle.id starts from '1'
            icebergOrcColumn != null ? icebergOrcColumn.getOrcColumnId() : column.getId() - 1, icebergOrcColumn != null ? icebergOrcColumn.getColumnType() : REGULAR, Optional.empty(), Optional.empty());
        });
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(hiveColumnHandleTupleDomain, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
        OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, UTC, systemMemoryUsage, INITIAL_BATCH_SIZE);
        return new OrcBatchPageSource(recordReader, orcDataSource, physicalColumnHandles, typeManager, systemMemoryUsage, stats, runtimeStats);
    } catch (Exception e) {
        if (orcDataSource != null) {
            try {
                orcDataSource.close();
            } catch (IOException ignored) {
            }
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof BlockMissingException) {
            throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) FileStatus(org.apache.hadoop.fs.FileStatus) RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) PrestoException(com.facebook.presto.spi.PrestoException) HiveFileContext(com.facebook.presto.hive.HiveFileContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) IOException(java.io.IOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) MessageType(org.apache.parquet.schema.MessageType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Aggregations

TypeManager (com.facebook.presto.common.type.TypeManager)33 List (java.util.List)28 Type (com.facebook.presto.common.type.Type)27 Optional (java.util.Optional)27 Objects.requireNonNull (java.util.Objects.requireNonNull)26 PrestoException (com.facebook.presto.spi.PrestoException)25 Map (java.util.Map)23 Path (org.apache.hadoop.fs.Path)23 ImmutableMap (com.google.common.collect.ImmutableMap)22 IOException (java.io.IOException)22 ConnectorSession (com.facebook.presto.spi.ConnectorSession)21 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)20 ImmutableList (com.google.common.collect.ImmutableList)20 String.format (java.lang.String.format)20 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)18 DateTimeZone (org.joda.time.DateTimeZone)18 SchemaTableName (com.facebook.presto.spi.SchemaTableName)17 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)16 Domain (com.facebook.presto.common.predicate.Domain)15 Set (java.util.Set)15