Search in sources :

Example 11 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session) {
    if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = new BinaryRcFileEncoding();
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
        rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // and index to rearrange columns in the proper order
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
        OutputStream outputStream = fileSystem.create(path);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen());
                } catch (IOException e) {
                    throw Throwables.propagate(e);
                }
            });
        }
        return Optional.of(new RcFileFileWriter(outputStream, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) Type(com.facebook.presto.spi.type.Type) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) Splitter(com.google.common.base.Splitter) OutputStream(java.io.OutputStream) RcFileDataSource(com.facebook.presto.rcfile.RcFileDataSource) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) RcFilePageSourceFactory.createTextVectorEncoding(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Optional(java.util.Optional) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(com.facebook.presto.rcfile.RcFileEncoding) IOException(java.io.IOException) IOException(java.io.IOException) Type(com.facebook.presto.spi.type.Type) HdfsRcFileDataSource(com.facebook.presto.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(com.facebook.presto.rcfile.binary.BinaryRcFileEncoding)

Example 12 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class HiveMetadata method computePartitionUpdatesForMissingBuckets.

private List<PartitionUpdate> computePartitionUpdatesForMissingBuckets(ConnectorSession session, HiveWritableTableHandle handle, Table table, List<PartitionUpdate> partitionUpdates) {
    // avoid creation of PartitionUpdate with empty list of files
    if (!shouldCreateFilesForMissingBuckets(table, session)) {
        return ImmutableList.of();
    }
    HiveStorageFormat storageFormat = table.getPartitionColumns().isEmpty() ? handle.getTableStorageFormat() : handle.getPartitionStorageFormat();
    // empty un-partitioned bucketed table
    if (table.getPartitionColumns().isEmpty() && partitionUpdates.isEmpty()) {
        int bucketCount = handle.getBucketProperty().get().getBucketCount();
        LocationHandle locationHandle = handle.getLocationHandle();
        List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.of());
        return ImmutableList.of(new PartitionUpdate("", (handle instanceof HiveInsertTableHandle) ? APPEND : NEW, locationHandle.getWritePath(), locationHandle.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
    }
    ImmutableList.Builder<PartitionUpdate> partitionUpdatesForMissingBucketsBuilder = ImmutableList.builder();
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        int bucketCount = handle.getBucketProperty().get().getBucketCount();
        List<String> fileNamesForMissingBuckets = computeFileNamesForMissingBuckets(session, storageFormat, handle.getCompressionCodec(), bucketCount, ImmutableSet.copyOf(getTargetFileNames(partitionUpdate.getFileWriteInfos())));
        partitionUpdatesForMissingBucketsBuilder.add(new PartitionUpdate(partitionUpdate.getName(), partitionUpdate.getUpdateMode(), partitionUpdate.getWritePath(), partitionUpdate.getTargetPath(), fileNamesForMissingBuckets.stream().map(fileName -> new FileWriteInfo(fileName, fileName, Optional.of(0L))).collect(toImmutableList()), 0, 0, 0, isFileRenamingEnabled(session)));
    }
    return partitionUpdatesForMissingBucketsBuilder.build();
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Statistics.createEmptyPartitionStatistics(com.facebook.presto.hive.metastore.Statistics.createEmptyPartitionStatistics) SORTED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.SORTED_BY_PROPERTY) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) FILE_SIZE_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) HiveSessionProperties.isPreferManifestsToListFiles(com.facebook.presto.hive.HiveSessionProperties.isPreferManifestsToListFiles) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) ComputedStatistics(com.facebook.presto.spi.statistics.ComputedStatistics) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) DWRF_ENCRYPTION_PROVIDER(com.facebook.presto.hive.HiveTableProperties.DWRF_ENCRYPTION_PROVIDER) MATERIALIZED_VIEW(com.facebook.presto.hive.metastore.PrestoTableType.MATERIALIZED_VIEW) HiveSessionProperties.isOptimizedPartitionUpdateSerializationEnabled(com.facebook.presto.hive.HiveSessionProperties.isOptimizedPartitionUpdateSerializationEnabled) Statistics.fromComputedStatistics(com.facebook.presto.hive.metastore.Statistics.fromComputedStatistics) MAX_VALUE_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES) HiveTableProperties.getPartitionedBy(com.facebook.presto.hive.HiveTableProperties.getPartitionedBy) Map(java.util.Map) LocalProperty(com.facebook.presto.spi.LocalProperty) SystemTable(com.facebook.presto.spi.SystemTable) ENGLISH(java.util.Locale.ENGLISH) DwrfTableEncryptionProperties.forTable(com.facebook.presto.hive.DwrfTableEncryptionProperties.forTable) NullableValue(com.facebook.presto.common.predicate.NullableValue) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveUtil.translateHiveUnsupportedTypeForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypeForTemporaryTable) PATH_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME) HivePrivilegeInfo.toHivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.toHivePrivilege) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) MIN_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE) NUMBER_OF_DISTINCT_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) CSV_SEPARATOR(com.facebook.presto.hive.HiveTableProperties.CSV_SEPARATOR) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) Joiner(com.google.common.base.Joiner) ConnectorPartitioningHandle(com.facebook.presto.spi.connector.ConnectorPartitioningHandle) Table(com.facebook.presto.hive.metastore.Table) Database(com.facebook.presto.hive.metastore.Database) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HIVE_BINARY(com.facebook.presto.hive.HiveType.HIVE_BINARY) FULLY_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.FULLY_MATERIALIZED) HiveBucketHandle.createVirtualBucketHandle(com.facebook.presto.hive.HiveBucketHandle.createVirtualBucketHandle) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) HiveTableProperties.getBucketProperty(com.facebook.presto.hive.HiveTableProperties.getBucketProperty) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) ConnectorOutputTableHandle(com.facebook.presto.spi.ConnectorOutputTableHandle) TableStatisticType(com.facebook.presto.spi.statistics.TableStatisticType) Supplier(java.util.function.Supplier) PAGEFILE(com.facebook.presto.hive.HiveStorageFormat.PAGEFILE) HiveMaterializedViewUtils.viewToBaseTableOnOuterJoinSideIndirectMappedPartitions(com.facebook.presto.hive.HiveMaterializedViewUtils.viewToBaseTableOnOuterJoinSideIndirectMappedPartitions) OptionalLong(java.util.OptionalLong) MetastoreUtil(com.facebook.presto.hive.metastore.MetastoreUtil) Lists(com.google.common.collect.Lists) MaterializedDataPredicates(com.facebook.presto.spi.MaterializedViewStatus.MaterializedDataPredicates) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) ENCRYPT_COLUMNS(com.facebook.presto.hive.HiveTableProperties.ENCRYPT_COLUMNS) Functions(com.google.common.base.Functions) HiveWriterFactory.computeBucketedFileName(com.facebook.presto.hive.HiveWriterFactory.computeBucketedFileName) RESPECT_TABLE_FORMAT(com.facebook.presto.hive.HiveSessionProperties.RESPECT_TABLE_FORMAT) IOException(java.io.IOException) Domain(com.facebook.presto.common.predicate.Domain) ConnectorTableLayoutResult(com.facebook.presto.spi.ConnectorTableLayoutResult) SchemaTablePrefix(com.facebook.presto.spi.SchemaTablePrefix) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) ConnectorNewTableLayout(com.facebook.presto.spi.ConnectorNewTableLayout) HiveUtil.hiveColumnHandles(com.facebook.presto.hive.HiveUtil.hiveColumnHandles) PRESTO_QUERY_ID_NAME(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_QUERY_ID_NAME) TableLayoutFilterCoverage(com.facebook.presto.spi.TableLayoutFilterCoverage) HiveManifestUtils.getManifestSizeInBytes(com.facebook.presto.hive.HiveManifestUtils.getManifestSizeInBytes) CSV_QUOTE(com.facebook.presto.hive.HiveTableProperties.CSV_QUOTE) ConnectorViewDefinition(com.facebook.presto.spi.ConnectorViewDefinition) RowType(com.facebook.presto.common.type.RowType) ViewNotFoundException(com.facebook.presto.spi.ViewNotFoundException) HiveAnalyzeProperties.getPartitionList(com.facebook.presto.hive.HiveAnalyzeProperties.getPartitionList) NUMBER_OF_TRUE_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES) JsonCodec(com.facebook.airlift.json.JsonCodec) DWRF_ENCRYPTION_ALGORITHM(com.facebook.presto.hive.HiveTableProperties.DWRF_ENCRYPTION_ALGORITHM) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) HiveUtil.encodeMaterializedViewData(com.facebook.presto.hive.HiveUtil.encodeMaterializedViewData) StandardFunctionResolution(com.facebook.presto.spi.function.StandardFunctionResolution) Privilege(com.facebook.presto.spi.security.Privilege) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) HiveSessionProperties.getTemporaryTableSchema(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableSchema) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ADD(com.facebook.presto.hive.metastore.Statistics.ReduceOperator.ADD) ConnectorTablePartitioning(com.facebook.presto.spi.ConnectorTablePartitioning) Collectors.toMap(java.util.stream.Collectors.toMap) MaterializedViewNotFoundException(com.facebook.presto.spi.MaterializedViewNotFoundException) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) PRESTO_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_VIEW_FLAG) DiscretePredicates(com.facebook.presto.spi.DiscretePredicates) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) ImmutableSet(com.google.common.collect.ImmutableSet) HIVE_UNKNOWN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) INVALID_SCHEMA_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) Collection(java.util.Collection) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) SCHEMA_NOT_EMPTY(com.facebook.presto.spi.StandardErrorCode.SCHEMA_NOT_EMPTY) DwrfTableEncryptionProperties.forPerColumn(com.facebook.presto.hive.DwrfTableEncryptionProperties.forPerColumn) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) RowExpressionService(com.facebook.presto.spi.relation.RowExpressionService) MetastoreUtil.toPartitionValues(com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionValues) LogicalRowExpressions.binaryExpression(com.facebook.presto.expressions.LogicalRowExpressions.binaryExpression) TOTAL_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES) IntStream(java.util.stream.IntStream) OVERWRITE(com.facebook.presto.hive.PartitionUpdate.UpdateMode.OVERWRITE) MapType(com.facebook.presto.common.type.MapType) HiveSessionProperties.isRespectTableFormat(com.facebook.presto.hive.HiveSessionProperties.isRespectTableFormat) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) CompletableFuture(java.util.concurrent.CompletableFuture) HiveUtil.verifyPartitionTypeSupported(com.facebook.presto.hive.HiveUtil.verifyPartitionTypeSupported) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) InMemoryRecordSet(com.facebook.presto.spi.InMemoryRecordSet) HashSet(java.util.HashSet) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) HiveMaterializedViewUtils.getViewToBasePartitionMap(com.facebook.presto.hive.HiveMaterializedViewUtils.getViewToBasePartitionMap) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) PREFERRED_ORDERING_COLUMNS(com.facebook.presto.hive.HiveTableProperties.PREFERRED_ORDERING_COLUMNS) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) ALREADY_EXISTS(com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS) SmileCodec(com.facebook.airlift.json.smile.SmileCodec) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) HiveWriteUtils.isWritableType(com.facebook.presto.hive.HiveWriteUtils.isWritableType) NoSuchElementException(java.util.NoSuchElementException) HiveTableProperties.getDwrfEncryptionAlgorithm(com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionAlgorithm) Type(com.facebook.presto.common.type.Type) ConnectorInsertTableHandle(com.facebook.presto.spi.ConnectorInsertTableHandle) USER(com.facebook.presto.spi.security.PrincipalType.USER) Storage(com.facebook.presto.hive.metastore.Storage) HivePartitioningHandle.createHiveCompatiblePartitioningHandle(com.facebook.presto.hive.HivePartitioningHandle.createHiveCompatiblePartitioningHandle) ROW_COUNT(com.facebook.presto.spi.statistics.TableStatisticType.ROW_COUNT) ConnectorTableLayout(com.facebook.presto.spi.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle) HiveTableProperties.getOrcBloomFilterColumns(com.facebook.presto.hive.HiveTableProperties.getOrcBloomFilterColumns) MoreFutures.toCompletableFuture(com.facebook.airlift.concurrent.MoreFutures.toCompletableFuture) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveUtil.translateHiveUnsupportedTypesForTemporaryTable(com.facebook.presto.hive.HiveUtil.translateHiveUnsupportedTypesForTemporaryTable) HiveMaterializedViewUtils.validateMaterializedViewPartitionColumns(com.facebook.presto.hive.HiveMaterializedViewUtils.validateMaterializedViewPartitionColumns) HiveTableProperties.getOrcBloomFilterFpp(com.facebook.presto.hive.HiveTableProperties.getOrcBloomFilterFpp) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPartitioningMetadata(com.facebook.presto.spi.connector.ConnectorPartitioningMetadata) HiveSessionProperties.getTemporaryTableStorageFormat(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableStorageFormat) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SYNTHESIZED(com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) AVRO_SCHEMA_URL_KEY(com.facebook.presto.hive.metastore.MetastoreUtil.AVRO_SCHEMA_URL_KEY) Comparator(java.util.Comparator) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) HIVE_COMPATIBLE(com.facebook.presto.hive.BucketFunctionType.HIVE_COMPATIBLE) NUMBER_OF_NON_NULL_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveSessionProperties.isParquetPushdownFilterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetPushdownFilterEnabled) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isStatisticsEnabled(com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled) EXTERNAL_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) AVRO_SCHEMA_URL(com.facebook.presto.hive.HiveTableProperties.AVRO_SCHEMA_URL) TupleDomain.withColumnDomains(com.facebook.presto.common.predicate.TupleDomain.withColumnDomains) HiveSessionProperties.getBucketFunctionTypeForExchange(com.facebook.presto.hive.HiveSessionProperties.getBucketFunctionTypeForExchange) HiveMaterializedViewUtils.getMaterializedDataPredicates(com.facebook.presto.hive.HiveMaterializedViewUtils.getMaterializedDataPredicates) HiveTableProperties.getCsvProperty(com.facebook.presto.hive.HiveTableProperties.getCsvProperty) Predicates.not(com.google.common.base.Predicates.not) NOT_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.NOT_MATERIALIZED) ConnectorMaterializedViewDefinition(com.facebook.presto.spi.ConnectorMaterializedViewDefinition) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) FileWriteInfo(com.facebook.presto.hive.PartitionUpdate.FileWriteInfo) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrivilegeInfo(com.facebook.presto.spi.security.PrivilegeInfo) TEMPORARY_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE) INVALID_TABLE_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveUtil.decodeMaterializedViewData(com.facebook.presto.hive.HiveUtil.decodeMaterializedViewData) HiveManifestUtils.updatePartitionMetadataWithFileNamesAndSizes(com.facebook.presto.hive.HiveManifestUtils.updatePartitionMetadataWithFileNamesAndSizes) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) PrincipalPrivileges(com.facebook.presto.hive.metastore.PrincipalPrivileges) HIVE_STORAGE_FORMAT(com.facebook.presto.hive.HiveSessionProperties.HIVE_STORAGE_FORMAT) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TypeUtils.isNumericType(com.facebook.presto.common.type.TypeUtils.isNumericType) JsonCodec.jsonCodec(com.facebook.airlift.json.JsonCodec.jsonCodec) ConnectorSession(com.facebook.presto.spi.ConnectorSession) CSV_ESCAPE(com.facebook.presto.hive.HiveTableProperties.CSV_ESCAPE) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveTableProperties.getEncryptTable(com.facebook.presto.hive.HiveTableProperties.getEncryptTable) TableStatisticsMetadata(com.facebook.presto.spi.statistics.TableStatisticsMetadata) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) HiveColumnHandle.updateRowIdHandle(com.facebook.presto.hive.HiveColumnHandle.updateRowIdHandle) HiveSessionProperties.getTemporaryTableCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getTemporaryTableCompressionCodec) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Iterables(com.google.common.collect.Iterables) HiveSessionProperties.isShufflePartitionedColumnsForTableWriteEnabled(com.facebook.presto.hive.HiveSessionProperties.isShufflePartitionedColumnsForTableWriteEnabled) Slice(io.airlift.slice.Slice) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) HiveUtil.getPartitionKeyColumnHandles(com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles) HiveSessionProperties.isCollectColumnStatisticsOnWrite(com.facebook.presto.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) ENCRYPT_TABLE(com.facebook.presto.hive.HiveTableProperties.ENCRYPT_TABLE) HiveTableProperties.getPreferredOrderingColumns(com.facebook.presto.hive.HiveTableProperties.getPreferredOrderingColumns) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) WriteInfo(com.facebook.presto.hive.LocationService.WriteInfo) HiveStatisticsProvider(com.facebook.presto.hive.statistics.HiveStatisticsProvider) HiveBasicStatistics.createEmptyStatistics(com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics) DATE(com.facebook.presto.common.type.DateType.DATE) Builder(com.google.common.collect.ImmutableMap.Builder) ArrayList(java.util.ArrayList) SortingProperty(com.facebook.presto.spi.SortingProperty) HiveTableProperties.getDwrfEncryptionProvider(com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionProvider) DwrfTableEncryptionProperties.fromHiveTableProperties(com.facebook.presto.hive.DwrfTableEncryptionProperties.fromHiveTableProperties) HiveUtil.encodeViewData(com.facebook.presto.hive.HiveUtil.encodeViewData) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) HiveWriteUtils.checkTableIsWritable(com.facebook.presto.hive.HiveWriteUtils.checkTableIsWritable) ImmutableSortedMap(com.google.common.collect.ImmutableSortedMap) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) ADMIN_ROLE_NAME(com.facebook.presto.hive.security.SqlStandardAccessControl.ADMIN_ROLE_NAME) ConnectorMetadataUpdateHandle(com.facebook.presto.spi.ConnectorMetadataUpdateHandle) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable(com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable) COVERED(com.facebook.presto.spi.TableLayoutFilterCoverage.COVERED) HiveSessionProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveSessionProperties.getHiveStorageFormat) HiveSessionProperties.isOptimizedMismatchedBucketCount(com.facebook.presto.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Properties(java.util.Properties) HiveSessionProperties.getCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getCompressionCodec) HIVE_CONCURRENT_MODIFICATION_DETECTED(com.facebook.presto.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) Constraint(com.facebook.presto.spi.Constraint) File(java.io.File) PRESTO_MATERIALIZED_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_MATERIALIZED_VIEW_FLAG) Streams.stream(com.google.common.collect.Streams.stream) ColumnWithStructSubfield(com.facebook.presto.hive.ColumnEncryptionInformation.ColumnWithStructSubfield) INVALID_VIEW(com.facebook.presto.spi.StandardErrorCode.INVALID_VIEW) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) ColumnHandle(com.facebook.presto.spi.ColumnHandle) HiveTableProperties.getEncryptColumns(com.facebook.presto.hive.HiveTableProperties.getEncryptColumns) INVALID_ANALYZE_PROPERTY(com.facebook.presto.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) QueryId(com.facebook.presto.spi.QueryId) HiveTableProperties.getAvroSchemaUrl(com.facebook.presto.hive.HiveTableProperties.getAvroSchemaUrl) HiveMaterializedViewUtils.differenceDataPredicates(com.facebook.presto.hive.HiveMaterializedViewUtils.differenceDataPredicates) URL(java.net.URL) HiveTableProperties.getExternalLocation(com.facebook.presto.hive.HiveTableProperties.getExternalLocation) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HiveType.toHiveType(com.facebook.presto.hive.HiveType.toHiveType) HiveSessionProperties.isUsePageFileForHiveUnsupportedType(com.facebook.presto.hive.HiveSessionProperties.isUsePageFileForHiveUnsupportedType) FILE_MODIFIED_TIME_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) HiveSessionProperties.isBucketExecutionEnabled(com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled) Iterables.concat(com.google.common.collect.Iterables.concat) HIVE_COLUMN_ORDER_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) MANAGED_TABLE(com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Splitter(com.google.common.base.Splitter) Collectors.toSet(java.util.stream.Collectors.toSet) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.isSortedWriteToTempPathEnabled(com.facebook.presto.hive.HiveSessionProperties.isSortedWriteToTempPathEnabled) ORC_BLOOM_FILTER_FPP(com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) ImmutableMap(com.google.common.collect.ImmutableMap) HivePartitioningHandle.createPrestoNativePartitioningHandle(com.facebook.presto.hive.HivePartitioningHandle.createPrestoNativePartitioningHandle) NEW(com.facebook.presto.hive.PartitionUpdate.UpdateMode.NEW) NOT_COVERED(com.facebook.presto.spi.TableLayoutFilterCoverage.NOT_COVERED) Predicate(java.util.function.Predicate) Collections.emptyList(java.util.Collections.emptyList) MAX_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE) HiveSessionProperties.isStreamingAggregationEnabled(com.facebook.presto.hive.HiveSessionProperties.isStreamingAggregationEnabled) Sets(com.google.common.collect.Sets) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) PrestoTableType(com.facebook.presto.hive.metastore.PrestoTableType) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) RoleGrant(com.facebook.presto.spi.security.RoleGrant) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) HIVE_TIMEZONE_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_TIMEZONE_MISMATCH) ORC_BLOOM_FILTER_COLUMNS(com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) TOO_MANY_PARTITIONS_MISSING(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.TOO_MANY_PARTITIONS_MISSING) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) PRESTO_NATIVE(com.facebook.presto.hive.BucketFunctionType.PRESTO_NATIVE) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) FilterStatsCalculatorService(com.facebook.presto.spi.plan.FilterStatsCalculatorService) HiveWriterFactory.getFileExtension(com.facebook.presto.hive.HiveWriterFactory.getFileExtension) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) HiveSessionProperties.isSortedWritingEnabled(com.facebook.presto.hive.HiveSessionProperties.isSortedWritingEnabled) Column(com.facebook.presto.hive.metastore.Column) ColumnStatisticMetadata(com.facebook.presto.spi.statistics.ColumnStatisticMetadata) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) PrestoException(com.facebook.presto.spi.PrestoException) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Verify.verify(com.google.common.base.Verify.verify) TypeManager(com.facebook.presto.common.type.TypeManager) HiveSessionProperties.getVirtualBucketCount(com.facebook.presto.hive.HiveSessionProperties.getVirtualBucketCount) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) Suppliers(com.google.common.base.Suppliers) HiveSessionProperties.isFileRenamingEnabled(com.facebook.presto.hive.HiveSessionProperties.isFileRenamingEnabled) SortingColumn(com.facebook.presto.hive.metastore.SortingColumn) RowExpression(com.facebook.presto.spi.relation.RowExpression) VerifyException(com.google.common.base.VerifyException) GrantInfo(com.facebook.presto.spi.security.GrantInfo) HiveTableProperties.isExternalTable(com.facebook.presto.hive.HiveTableProperties.isExternalTable) NOT_APPLICABLE(com.facebook.presto.spi.TableLayoutFilterCoverage.NOT_APPLICABLE) MalformedURLException(java.net.MalformedURLException) HIVE_UNSUPPORTED_ENCRYPTION_OPERATION(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_ENCRYPTION_OPERATION) Statistics.reduce(com.facebook.presto.hive.metastore.Statistics.reduce) ConnectorOutputMetadata(com.facebook.presto.spi.connector.ConnectorOutputMetadata) EXTERNAL_LOCATION_PROPERTY(com.facebook.presto.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) HiveSessionProperties.isCreateEmptyBucketFiles(com.facebook.presto.hive.HiveSessionProperties.isCreateEmptyBucketFiles) VIEW_STORAGE_FORMAT(com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) HiveUtil.deserializeZstdCompressed(com.facebook.presto.hive.HiveUtil.deserializeZstdCompressed) Maps(com.google.common.collect.Maps) ThriftMetastoreUtil.listEnabledPrincipals(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.listEnabledPrincipals) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) VIRTUAL_VIEW(com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW) ConnectorIdentity(com.facebook.presto.spi.security.ConnectorIdentity) HiveBasicStatistics.createZeroStatistics(com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics) PARTIALLY_MATERIALIZED(com.facebook.presto.spi.MaterializedViewStatus.MaterializedViewState.PARTIALLY_MATERIALIZED) UUID.randomUUID(java.util.UUID.randomUUID) ThriftMetastoreUtil(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) HIVE_PARTITION_READ_ONLY(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_READ_ONLY) HiveStorageFormat.values(com.facebook.presto.hive.HiveStorageFormat.values) APPEND(com.facebook.presto.hive.PartitionUpdate.UpdateMode.APPEND) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) HiveUtil.decodeViewData(com.facebook.presto.hive.HiveUtil.decodeViewData) MaterializedViewStatus(com.facebook.presto.spi.MaterializedViewStatus) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveSessionProperties.getOrcCompressionCodec(com.facebook.presto.hive.HiveSessionProperties.getOrcCompressionCodec) Block(com.facebook.presto.common.block.Block) Statistics.createComputedStatisticsToPartitionMap(com.facebook.presto.hive.metastore.Statistics.createComputedStatisticsToPartitionMap) HiveTableProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveTableProperties.getHiveStorageFormat) HiveSessionProperties.getHiveStorageFormat(com.facebook.presto.hive.HiveSessionProperties.getHiveStorageFormat) StorageFormat.fromHiveStorageFormat(com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Constraint(com.facebook.presto.spi.Constraint) FileWriteInfo(com.facebook.presto.hive.PartitionUpdate.FileWriteInfo)

Example 13 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class ParquetFileWriterFactory method createFileWriter.

@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
    if (!isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxBlockSize(getParquetWriterBlockSize(session)).build();
    CompressionCodecName compressionCodecName = getCompression(conf);
    List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
    List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new ParquetFileWriter(fileSystem.create(path), rollbackAction, fileColumnNames, fileColumnTypes, parquetWriterOptions, fileInputColumnIndexes, compressionCodecName));
    } catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) Inject(com.google.inject.Inject) HiveSessionProperties.getParquetWriterPageSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterPageSize) Callable(java.util.concurrent.Callable) META_TABLE_COLUMNS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS) PrestoException(com.facebook.presto.spi.PrestoException) HiveSessionProperties.isParquetOptimizedWriterEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetOptimizedWriterEnabled) NodeVersion(com.facebook.presto.hive.NodeVersion) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getParquetWriterBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetWriterBlockSize) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) Splitter(com.google.common.base.Splitter) Type(com.facebook.presto.common.type.Type) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) META_TABLE_COLUMN_TYPES(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES) Properties(java.util.Properties) ParquetOutputFormat(org.apache.parquet.hadoop.ParquetOutputFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveFileWriterFactory(com.facebook.presto.hive.HiveFileWriterFactory) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions) HiveFileWriter(com.facebook.presto.hive.HiveFileWriter) IOException(java.io.IOException) ConnectorSession(com.facebook.presto.spi.ConnectorSession) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HIVE_WRITER_OPEN_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveType.toHiveTypes(com.facebook.presto.hive.HiveType.toHiveTypes) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) Type(com.facebook.presto.common.type.Type) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) ParquetWriterOptions(com.facebook.presto.parquet.writer.ParquetWriterOptions)

Example 14 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class TestHivePageSourceProvider method testUseRecordReaderWithInputFormatAnnotationAndCustomSplit.

@Test
public void testUseRecordReaderWithInputFormatAnnotationAndCustomSplit() {
    StorageFormat storageFormat = StorageFormat.create(ParquetHiveSerDe.class.getName(), HoodieParquetInputFormat.class.getName(), "");
    Storage storage = new Storage(storageFormat, "test", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of());
    Map<String, String> customSplitInfo = ImmutableMap.of(CUSTOM_FILE_SPLIT_CLASS_KEY, HoodieRealtimeFileSplit.class.getName(), HUDI_BASEPATH_KEY, "/test/file.parquet", HUDI_DELTA_FILEPATHS_KEY, "/test/.file_100.log", HUDI_MAX_COMMIT_TIME_KEY, "100");
    HiveRecordCursorProvider recordCursorProvider = new MockHiveRecordCursorProvider();
    HiveBatchPageSourceFactory hiveBatchPageSourceFactory = new MockHiveBatchPageSourceFactory();
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(recordCursorProvider), ImmutableSet.of(hiveBatchPageSourceFactory), new Configuration(), new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setUseRecordPageSourceForCustomSplit(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new Path("/test/"), OptionalInt.empty(), 0, 100, 200, Instant.now().toEpochMilli(), storage, TupleDomain.none(), ImmutableList.of(), ImmutableMap.of(), ImmutableList.of(), DateTimeZone.UTC, new TestingTypeManager(), new SchemaTableName("test", "test"), ImmutableList.of(), ImmutableList.of(), ImmutableMap.of(), 0, TableToPartitionMapping.empty(), Optional.empty(), false, null, null, false, null, Optional.empty(), customSplitInfo);
    assertTrue(pageSource.isPresent());
    assertTrue(pageSource.get() instanceof RecordPageSource);
}
Also used : HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Path(org.apache.hadoop.fs.Path) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Configuration(org.apache.hadoop.conf.Configuration) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Storage(com.facebook.presto.hive.metastore.Storage) CacheConfig(com.facebook.presto.cache.CacheConfig) TestingTypeManager(com.facebook.presto.common.type.TestingTypeManager) Test(org.testng.annotations.Test)

Example 15 with StorageFormat

use of com.facebook.presto.hive.metastore.StorageFormat in project presto by prestodb.

the class HiveSplitManager method getPartitionMetadata.

private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketHandle> hiveBucketHandle, ConnectorSession session, WarningCollector warningCollector, Optional<Set<HiveColumnHandle>> requestedColumns, Map<String, HiveColumnHandle> predicateColumns, Optional<Map<Subfield, Domain>> domains) {
    if (hivePartitions.isEmpty()) {
        return ImmutableList.of();
    }
    Optional<Set<HiveColumnHandle>> allRequestedColumns = mergeRequestedAndPredicateColumns(requestedColumns, ImmutableSet.copyOf(predicateColumns.values()));
    if (hivePartitions.size() == 1) {
        HivePartition firstPartition = getOnlyElement(hivePartitions);
        if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
            return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), TableToPartitionMapping.empty(), encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns), ImmutableSet.of()));
        }
    }
    StorageFormat storageFormat = table.getStorage().getStorageFormat();
    Optional<HiveStorageFormat> hiveStorageFormat = getHiveStorageFormat(storageFormat);
    Optional<HiveStorageFormat> resolvedHiveStorageFormat;
    if (isUseParquetColumnNames(session)) {
        // Use Hive Storage Format as Parquet if table is of HUDI format
        resolvedHiveStorageFormat = (!hiveStorageFormat.isPresent() && isHudiFormat(storageFormat)) ? Optional.of(PARQUET) : hiveStorageFormat;
    } else {
        resolvedHiveStorageFormat = hiveStorageFormat;
    }
    Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
    Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
        Map<String, PartitionSplitInfo> partitionSplitInfo = getPartitionSplitInfo(session, metastore, tableName, partitionBatch, predicateColumns, domains);
        if (partitionBatch.size() != partitionSplitInfo.size()) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitionSplitInfo.size()));
        }
        Map<String, Partition> partitions = partitionSplitInfo.entrySet().stream().collect(toImmutableMap(Entry::getKey, entry -> entry.getValue().getPartition()));
        Optional<Map<String, EncryptionInformation>> encryptionInformationForPartitions = encryptionInformationProvider.getReadEncryptionInformation(session, table, allRequestedColumns, partitions);
        ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
        Map<String, Set<String>> partitionsNotReadable = new HashMap<>();
        int unreadablePartitionsSkipped = 0;
        for (HivePartition hivePartition : partitionBatch) {
            Partition partition = partitions.get(hivePartition.getPartitionId());
            if (partitionSplitInfo.get(hivePartition.getPartitionId()).isPruned()) {
                continue;
            }
            if (partition == null) {
                throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
            }
            String partitionName = makePartName(table.getPartitionColumns(), partition.getValues());
            Optional<EncryptionInformation> encryptionInformation = encryptionInformationForPartitions.map(metadata -> metadata.get(hivePartition.getPartitionId()));
            if (!isOfflineDataDebugModeEnabled(session)) {
                // verify partition is online
                verifyOnline(tableName, Optional.of(partitionName), getProtectMode(partition), partition.getParameters());
                // verify partition is not marked as non-readable
                String reason = partition.getParameters().get(OBJECT_NOT_READABLE);
                if (!isNullOrEmpty(reason)) {
                    if (!shouldIgnoreUnreadablePartition(session) || !partition.isEligibleToIgnore()) {
                        throw new HiveNotReadableException(tableName, Optional.of(partitionName), reason);
                    }
                    unreadablePartitionsSkipped++;
                    if (partitionsNotReadable.size() <= 3) {
                        partitionsNotReadable.putIfAbsent(reason, new HashSet<>(ImmutableSet.of(partitionName)));
                        if (partitionsNotReadable.get(reason).size() <= 3) {
                            partitionsNotReadable.get(reason).add(partitionName);
                        }
                    }
                    continue;
                }
            }
            // Verify that the partition schema matches the table schema.
            // Either adding or dropping columns from the end of the table
            // without modifying existing partitions is allowed, but every
            // column that exists in both the table and partition must have
            // the same type.
            List<Column> tableColumns = table.getDataColumns();
            List<Column> partitionColumns = partition.getColumns();
            if ((tableColumns == null) || (partitionColumns == null)) {
                throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partitionName));
            }
            TableToPartitionMapping tableToPartitionMapping = getTableToPartitionMapping(session, resolvedHiveStorageFormat, tableName, partitionName, tableColumns, partitionColumns);
            if (hiveBucketHandle.isPresent() && !hiveBucketHandle.get().isVirtuallyBucketed()) {
                Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
                if (!partitionBucketProperty.isPresent()) {
                    throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) is bucketed but partition (%s) is not bucketed", hivePartition.getTableName(), hivePartition.getPartitionId()));
                }
                int tableBucketCount = hiveBucketHandle.get().getTableBucketCount();
                int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
                List<String> tableBucketColumns = hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).collect(toImmutableList());
                List<String> partitionBucketColumns = partitionBucketProperty.get().getBucketedBy();
                if (!tableBucketColumns.equals(partitionBucketColumns) || !isBucketCountCompatible(tableBucketCount, partitionBucketCount)) {
                    throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("Hive table (%s) bucketing (columns=%s, buckets=%s) is not compatible with partition (%s) bucketing (columns=%s, buckets=%s)", hivePartition.getTableName(), tableBucketColumns, tableBucketCount, hivePartition.getPartitionId(), partitionBucketColumns, partitionBucketCount));
                }
            }
            results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), tableToPartitionMapping, encryptionInformation, partitionSplitInfo.get(hivePartition.getPartitionId()).getRedundantColumnDomains()));
        }
        if (unreadablePartitionsSkipped > 0) {
            StringBuilder warningMessage = new StringBuilder(format("Table '%s' has %s out of %s partitions unreadable: ", tableName, unreadablePartitionsSkipped, partitionBatch.size()));
            for (Entry<String, Set<String>> entry : partitionsNotReadable.entrySet()) {
                warningMessage.append(String.join(", ", entry.getValue())).append("... are due to ").append(entry.getKey()).append(". ");
            }
            warningCollector.add(new PrestoWarning(PARTITION_NOT_READABLE, warningMessage.toString()));
        }
        return results.build();
    });
    return concat(partitionBatches);
}
Also used : WarningCollector(com.facebook.presto.spi.WarningCollector) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) MetastoreUtil.makePartName(com.facebook.presto.hive.metastore.MetastoreUtil.makePartName) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) CounterStat(com.facebook.airlift.stats.CounterStat) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) HiveSessionProperties.isOfflineDataDebugModeEnabled(com.facebook.presto.hive.HiveSessionProperties.isOfflineDataDebugModeEnabled) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) SERVER_SHUTTING_DOWN(com.facebook.presto.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) IntegerStatistics(com.facebook.presto.hive.metastore.IntegerStatistics) Map(java.util.Map) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) ENGLISH(java.util.Locale.ENGLISH) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Decimals.encodeScaledValue(com.facebook.presto.common.type.Decimals.encodeScaledValue) SemiTransactionalHiveMetastore(com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore) HiveSessionProperties.getHiveMaxInitialSplitSize(com.facebook.presto.hive.HiveSessionProperties.getHiveMaxInitialSplitSize) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) TableToPartitionMapping.mapColumnsByIndex(com.facebook.presto.hive.TableToPartitionMapping.mapColumnsByIndex) MetastoreUtil.getMetastoreHeaders(com.facebook.presto.hive.metastore.MetastoreUtil.getMetastoreHeaders) Iterables(com.google.common.collect.Iterables) Table(com.facebook.presto.hive.metastore.Table) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HiveSessionProperties.shouldIgnoreUnreadablePartition(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreUnreadablePartition) Collectors.groupingBy(java.util.stream.Collectors.groupingBy) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) HiveSessionProperties.isPartitionStatisticsBasedOptimizationEnabled(com.facebook.presto.hive.HiveSessionProperties.isPartitionStatisticsBasedOptimizationEnabled) Lists(com.google.common.collect.Lists) Managed(org.weakref.jmx.Managed) PrestoWarning(com.facebook.presto.spi.PrestoWarning) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) HiveBucketFilter(com.facebook.presto.hive.HiveBucketing.HiveBucketFilter) Executor(java.util.concurrent.Executor) DoubleStatistics(com.facebook.presto.hive.metastore.DoubleStatistics) AbstractIterator(com.google.common.collect.AbstractIterator) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Collectors.reducing(java.util.stream.Collectors.reducing) Domain(com.facebook.presto.common.predicate.Domain) ColumnHandle(com.facebook.presto.spi.ColumnHandle) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) BucketSplitInfo.createBucketSplitInfo(com.facebook.presto.hive.StoragePartitionLoader.BucketSplitInfo.createBucketSplitInfo) ValueSet(com.facebook.presto.common.predicate.ValueSet) Iterables.transform(com.google.common.collect.Iterables.transform) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) Float.floatToIntBits(java.lang.Float.floatToIntBits) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Iterables.concat(com.google.common.collect.Iterables.concat) HiveType.getPrimitiveType(com.facebook.presto.hive.HiveType.getPrimitiveType) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) GROUPED_SCHEDULING(com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) HiveSessionProperties.getLeaseDuration(com.facebook.presto.hive.HiveSessionProperties.getLeaseDuration) Math.min(java.lang.Math.min) String.format(java.lang.String.format) Range(com.facebook.presto.common.predicate.Range) DateStatistics(com.facebook.presto.hive.metastore.DateStatistics) DataSize(io.airlift.units.DataSize) List(java.util.List) Entry(java.util.Map.Entry) Optional(java.util.Optional) Nested(org.weakref.jmx.Nested) MetastoreUtil.getProtectMode(com.facebook.presto.hive.metastore.MetastoreUtil.getProtectMode) Column(com.facebook.presto.hive.metastore.Column) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) HIVE_PARTITION_DROPPED_DURING_QUERY(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) HashMap(java.util.HashMap) PrestoException(com.facebook.presto.spi.PrestoException) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) Partition(com.facebook.presto.hive.metastore.Partition) Inject(javax.inject.Inject) HashSet(java.util.HashSet) UNPARTITIONED_ID(com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) BoundedExecutor(com.facebook.airlift.concurrent.BoundedExecutor) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) PARTITION_NOT_READABLE(com.facebook.presto.hive.HiveWarningCode.PARTITION_NOT_READABLE) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.verifyOnline(com.facebook.presto.hive.metastore.MetastoreUtil.verifyOnline) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) Type(com.facebook.presto.common.type.Type) ExecutorService(java.util.concurrent.ExecutorService) HIVE_TRANSACTION_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_TRANSACTION_NOT_FOUND) Iterator(java.util.Iterator) FixedSplitSource(com.facebook.presto.spi.FixedSplitSource) PRIMITIVE(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Ordering(com.google.common.collect.Ordering) MetastoreUtil.isUserDefinedTypeEncodingEnabled(com.facebook.presto.hive.metastore.MetastoreUtil.isUserDefinedTypeEncodingEnabled) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Set(java.util.Set) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ValueSet(com.facebook.presto.common.predicate.ValueSet) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) HashSet(java.util.HashSet) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) HiveStorageFormat.getHiveStorageFormat(com.facebook.presto.hive.HiveStorageFormat.getHiveStorageFormat) Column(com.facebook.presto.hive.metastore.Column) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HiveSessionProperties.shouldIgnoreUnreadablePartition(com.facebook.presto.hive.HiveSessionProperties.shouldIgnoreUnreadablePartition) Partition(com.facebook.presto.hive.metastore.Partition) PrestoWarning(com.facebook.presto.spi.PrestoWarning) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

StorageFormat (com.facebook.presto.hive.metastore.StorageFormat)20 ConnectorSession (com.facebook.presto.spi.ConnectorSession)15 List (java.util.List)15 Objects.requireNonNull (java.util.Objects.requireNonNull)15 Optional (java.util.Optional)15 Path (org.apache.hadoop.fs.Path)15 PrestoException (com.facebook.presto.spi.PrestoException)14 ImmutableMap (com.google.common.collect.ImmutableMap)14 IOException (java.io.IOException)14 Collectors.toList (java.util.stream.Collectors.toList)14 DateTimeZone (org.joda.time.DateTimeZone)14 SchemaTableName (com.facebook.presto.spi.SchemaTableName)12 Type (com.facebook.presto.common.type.Type)11 Test (org.testng.annotations.Test)11 DWRF (com.facebook.presto.hive.HiveStorageFormat.DWRF)10 ORC (com.facebook.presto.hive.HiveStorageFormat.ORC)10 PARQUET (com.facebook.presto.hive.HiveStorageFormat.PARQUET)10 Column (com.facebook.presto.hive.metastore.Column)10 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)10 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)9