Search in sources :

Example 1 with ConnectorTableProperties

use of io.trino.spi.connector.ConnectorTableProperties in project trino by trinodb.

the class HiveMetadata method getTableProperties.

@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
    HiveTableHandle hiveTable = (HiveTableHandle) table;
    List<ColumnHandle> partitionColumns = ImmutableList.copyOf(hiveTable.getPartitionColumns());
    TupleDomain<ColumnHandle> predicate = TupleDomain.all();
    Optional<DiscretePredicates> discretePredicates = Optional.empty();
    // So computation of  predicate and discretePredicates are not valid.
    if (hiveTable.getPartitionNames().isEmpty()) {
        Optional<List<HivePartition>> partitions = hiveTable.getPartitions().or(() -> {
            // We load the partitions to compute the predicates enforced by the table.
            // Note that the computation is not persisted in the table handle, so can be redone many times
            // TODO: https://github.com/trinodb/trino/issues/10980.
            HivePartitionResult partitionResult = partitionManager.getPartitions(metastore, table, new Constraint(hiveTable.getEnforcedConstraint()));
            if (partitionManager.canPartitionsBeLoaded(partitionResult)) {
                return Optional.of(partitionManager.getPartitionsAsList(partitionResult));
            }
            return Optional.empty();
        });
        if (partitions.isPresent()) {
            List<HivePartition> hivePartitions = partitions.orElseThrow();
            // Since the partitions are fully loaded now, we need to compute
            predicate = createPredicate(partitionColumns, hivePartitions);
            // this check allows us to ensure that table is partitioned
            if (!partitionColumns.isEmpty()) {
                // Do not create tuple domains for every partition at the same time!
                // There can be a huge number of partitions so use an iterable so
                // all domains do not need to be in memory at the same time.
                Iterable<TupleDomain<ColumnHandle>> partitionDomains = Iterables.transform(hivePartitions, hivePartition -> TupleDomain.fromFixedValues(hivePartition.getKeys()));
                discretePredicates = Optional.of(new DiscretePredicates(partitionColumns, partitionDomains));
            }
        }
    }
    Optional<ConnectorTablePartitioning> tablePartitioning = Optional.empty();
    List<LocalProperty<ColumnHandle>> sortingProperties = ImmutableList.of();
    if (hiveTable.getBucketHandle().isPresent()) {
        if (isPropagateTableScanSortingProperties(session) && !hiveTable.getBucketHandle().get().getSortedBy().isEmpty()) {
            // Populating SortingProperty guarantees to the engine that it is reading pre-sorted input.
            // We detect compatibility between table and partition level sorted_by properties
            // and fail the query if there is a mismatch in HiveSplitManager#getPartitionMetadata.
            // This can lead to incorrect results if a sorted_by property is defined over unsorted files.
            Map<String, ColumnHandle> columnHandles = getColumnHandles(session, table);
            sortingProperties = hiveTable.getBucketHandle().get().getSortedBy().stream().map(sortingColumn -> new SortingProperty<>(columnHandles.get(sortingColumn.getColumnName()), sortingColumn.getOrder().getSortOrder())).collect(toImmutableList());
        }
        if (isBucketExecutionEnabled(session)) {
            tablePartitioning = hiveTable.getBucketHandle().map(bucketing -> new ConnectorTablePartitioning(new HivePartitioningHandle(bucketing.getBucketingVersion(), bucketing.getReadBucketCount(), bucketing.getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.empty(), false), bucketing.getColumns().stream().map(ColumnHandle.class::cast).collect(toImmutableList())));
        }
    }
    return new ConnectorTableProperties(predicate, tablePartitioning, Optional.empty(), discretePredicates, sortingProperties);
}
Also used : HiveTableProperties.getBucketProperty(io.trino.plugin.hive.HiveTableProperties.getBucketProperty) InsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) HiveSessionProperties.getQueryPartitionFilterRequiredSchemas(io.trino.plugin.hive.HiveSessionProperties.getQueryPartitionFilterRequiredSchemas) ORC_BLOOM_FILTER_FPP(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) FileSystem(org.apache.hadoop.fs.FileSystem) HiveBucketing.isSupportedBucketing(io.trino.plugin.hive.util.HiveBucketing.isSupportedBucketing) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.hive.HiveTableProperties.ANALYZE_COLUMNS_PROPERTY) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveTableProperties.getFooterSkipCount(io.trino.plugin.hive.HiveTableProperties.getFooterSkipCount) TABLE_NOT_FOUND(io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND) Map(java.util.Map) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) HiveSessionProperties.isBucketExecutionEnabled(io.trino.plugin.hive.HiveSessionProperties.isBucketExecutionEnabled) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Domain(io.trino.spi.predicate.Domain) HiveSessionProperties.isStatisticsEnabled(io.trino.plugin.hive.HiveSessionProperties.isStatisticsEnabled) NULL_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY) HiveUtil.verifyPartitionTypeSupported(io.trino.plugin.hive.util.HiveUtil.verifyPartitionTypeSupported) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) AcidUtils.deltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deltaSubdir) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) HIVE_CONCURRENT_MODIFICATION_DETECTED(io.trino.plugin.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) HIVE_UNKNOWN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) HiveSessionProperties.isSortedWritingEnabled(io.trino.plugin.hive.HiveSessionProperties.isSortedWritingEnabled) Joiner(com.google.common.base.Joiner) Partition(io.trino.plugin.hive.metastore.Partition) HiveColumnHandle.updateRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle) HiveUtil(io.trino.plugin.hive.util.HiveUtil) HiveWriterFactory.computeNonTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeNonTransactionalBucketedFilename) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) Supplier(java.util.function.Supplier) HiveTableProperties.getSingleCharacterProperty(io.trino.plugin.hive.HiveTableProperties.getSingleCharacterProperty) TimestampType(io.trino.spi.type.TimestampType) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) HiveSessionProperties.getCompressionCodec(io.trino.plugin.hive.HiveSessionProperties.getCompressionCodec) OptionalLong(java.util.OptionalLong) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) PATH_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME) GrantInfo(io.trino.spi.security.GrantInfo) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveAnalyzeProperties.getColumnNames(io.trino.plugin.hive.HiveAnalyzeProperties.getColumnNames) MapType(io.trino.spi.type.MapType) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTableProperties.getPartitionedBy(io.trino.plugin.hive.HiveTableProperties.getPartitionedBy) CatalogName(io.trino.plugin.base.CatalogName) HiveSessionProperties.isCollectColumnStatisticsOnWrite(io.trino.plugin.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) AVRO_SCHEMA_URL(io.trino.plugin.hive.HiveTableProperties.AVRO_SCHEMA_URL) HiveBucketing.getHiveBucketHandle(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketHandle) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) HiveTableProperties.getHeaderSkipCount(io.trino.plugin.hive.HiveTableProperties.getHeaderSkipCount) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) HiveWriteUtils.initializeSerializer(io.trino.plugin.hive.util.HiveWriteUtils.initializeSerializer) HiveSessionProperties.isCreateEmptyBucketFiles(io.trino.plugin.hive.HiveSessionProperties.isCreateEmptyBucketFiles) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HiveSessionProperties.isNonTransactionalOptimizeEnabled(io.trino.plugin.hive.HiveSessionProperties.isNonTransactionalOptimizeEnabled) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) HiveTableProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat) HiveTableProperties.getAvroSchemaUrl(io.trino.plugin.hive.HiveTableProperties.getAvroSchemaUrl) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MetadataProvider(io.trino.spi.connector.MetadataProvider) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Collectors.toMap(java.util.stream.Collectors.toMap) Block(io.trino.spi.block.Block) ViewReaderUtil.encodeViewData(io.trino.plugin.hive.ViewReaderUtil.encodeViewData) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) ImmutableSet(com.google.common.collect.ImmutableSet) SemiTransactionalHiveMetastore.cleanExtraOutputFiles(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore.cleanExtraOutputFiles) Collection(java.util.Collection) VIEW_STORAGE_FORMAT(io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ROW_COUNT(io.trino.spi.statistics.TableStatisticType.ROW_COUNT) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) StandardErrorCode(io.trino.spi.StandardErrorCode) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) TableStatisticType(io.trino.spi.statistics.TableStatisticType) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) HiveSessionProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveSessionProperties.getHiveStorageFormat) AUTO_PURGE(io.trino.plugin.hive.HiveTableProperties.AUTO_PURGE) NANOSECONDS(io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS) CompletableFuture(java.util.concurrent.CompletableFuture) HiveSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.hive.HiveSessionProperties.isProjectionPushdownEnabled) Statistics.createComputedStatisticsToPartitionMap(io.trino.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HiveTableProperties.isTransactional(io.trino.plugin.hive.HiveTableProperties.isTransactional) HashSet(java.util.HashSet) ViewReaderUtil.createViewReader(io.trino.plugin.hive.ViewReaderUtil.createViewReader) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) ImmutableList(com.google.common.collect.ImmutableList) TEXTFILE_FIELD_SEPARATOR_ESCAPE(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR_ESCAPE) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) NoSuchElementException(java.util.NoSuchElementException) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) RetryMode(io.trino.spi.connector.RetryMode) NEW(io.trino.plugin.hive.PartitionUpdate.UpdateMode.NEW) HIVE_VIEW_TRANSLATION_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_VIEW_TRANSLATION_ERROR) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveWriteUtils.isFileCreatedByQuery(io.trino.plugin.hive.util.HiveWriteUtils.isFileCreatedByQuery) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) JobConf(org.apache.hadoop.mapred.JobConf) HiveTableProperties.getNullFormat(io.trino.plugin.hive.HiveTableProperties.getNullFormat) HiveTableProperties.getOrcBloomFilterFpp(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterFpp) OrcAcidVersion.writeVersionFile(org.apache.hadoop.hive.ql.io.AcidUtils.OrcAcidVersion.writeVersionFile) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) HiveWriterFactory.computeTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeTransactionalBucketedFilename) TypeManager(io.trino.spi.type.TypeManager) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) HiveTableProperties.getExternalLocation(io.trino.plugin.hive.HiveTableProperties.getExternalLocation) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) Column(io.trino.plugin.hive.metastore.Column) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PARTITION_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME) ORC_BLOOM_FILTER_COLUMNS(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) Table(io.trino.plugin.hive.metastore.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) MetastoreUtil.getHiveSchema(io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) HIVE_COLUMN_ORDER_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) CSV_QUOTE(io.trino.plugin.hive.HiveTableProperties.CSV_QUOTE) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HIVE_INVALID_VIEW_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) NON_TRANSACTIONAL_OPTIMIZE_ENABLED(io.trino.plugin.hive.HiveSessionProperties.NON_TRANSACTIONAL_OPTIMIZE_ENABLED) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) ArrayList(java.util.ArrayList) PrincipalPrivileges.fromHivePrivilegeInfos(io.trino.plugin.hive.metastore.PrincipalPrivileges.fromHivePrivilegeInfos) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveUtil.hiveColumnHandles(io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) ViewReaderUtil.isPrestoView(io.trino.plugin.hive.ViewReaderUtil.isPrestoView) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveSessionProperties.isRespectTableFormat(io.trino.plugin.hive.HiveSessionProperties.isRespectTableFormat) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) Properties(java.util.Properties) HiveWriteUtils.checkTableIsWritable(io.trino.plugin.hive.util.HiveWriteUtils.checkTableIsWritable) EXTERNAL_LOCATION_PROPERTY(io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) TEXTFILE_FIELD_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveStatisticsProvider(io.trino.plugin.hive.statistics.HiveStatisticsProvider) RoleGrant(io.trino.spi.security.RoleGrant) File(java.io.File) HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore(io.trino.plugin.hive.HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) Sets.intersection(com.google.common.collect.Sets.intersection) TableType(org.apache.hadoop.hive.metastore.TableType) HiveSessionProperties.isParallelPartitionedBucketedWrites(io.trino.plugin.hive.HiveSessionProperties.isParallelPartitionedBucketedWrites) ViewReaderUtil.isHiveOrPrestoView(io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView) HiveSessionProperties.isQueryPartitionFilterRequired(io.trino.plugin.hive.HiveSessionProperties.isQueryPartitionFilterRequired) HIVE_WRITER_CLOSE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR) URL(java.net.URL) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HiveBucketing(io.trino.plugin.hive.util.HiveBucketing) Iterables.concat(com.google.common.collect.Iterables.concat) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) ErrorType(io.trino.spi.ErrorType) HIVE_FILESYSTEM_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) Splitter(com.google.common.base.Splitter) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OVERWRITE(io.trino.plugin.hive.PartitionUpdate.UpdateMode.OVERWRITE) SKIP_HEADER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) AccessControlMetadata(io.trino.plugin.hive.security.AccessControlMetadata) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HiveTableProperties.getAnalyzeColumns(io.trino.plugin.hive.HiveTableProperties.getAnalyzeColumns) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) HiveApplyProjectionUtil.find(io.trino.plugin.hive.HiveApplyProjectionUtil.find) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) FILE_SIZE_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) Assignment(io.trino.spi.connector.Assignment) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) SystemTable(io.trino.spi.connector.SystemTable) CSV_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.CSV_SEPARATOR) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) HiveSessionProperties.isOptimizedMismatchedBucketCount(io.trino.plugin.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Verify.verify(com.google.common.base.Verify.verify) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) HiveTableProperties.getOrcBloomFilterColumns(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterColumns) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) Privilege(io.trino.spi.security.Privilege) VerifyException(com.google.common.base.VerifyException) APPEND(io.trino.plugin.hive.PartitionUpdate.UpdateMode.APPEND) HiveSessionProperties.getInsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) MalformedURLException(java.net.MalformedURLException) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OptimizeTableProcedure(io.trino.plugin.hive.procedure.OptimizeTableProcedure) SKIP_FOOTER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT) SYNTHESIZED(io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) LocalProperty(io.trino.spi.connector.LocalProperty) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveUtil.getPartitionKeyColumnHandles(io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles) Collections(java.util.Collections) CSV_ESCAPE(io.trino.plugin.hive.HiveTableProperties.CSV_ESCAPE) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) HiveColumnHandle.updateRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) Constraint(io.trino.spi.connector.Constraint) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) TupleDomain(io.trino.spi.predicate.TupleDomain) LocalProperty(io.trino.spi.connector.LocalProperty) ImmutableList(com.google.common.collect.ImmutableList) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties)

Example 2 with ConnectorTableProperties

use of io.trino.spi.connector.ConnectorTableProperties in project trino by trinodb.

the class AbstractTestHive method testBucketedTableBigintBoolean.

@SuppressWarnings("ConstantConditions")
@Test
public void testBucketedTableBigintBoolean() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableBucketedBigintBoolean);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        assertTableIsBucketed(tableHandle, transaction, session);
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true)), tableHandle);
        // trino_test_bucketed_by_bigint_boolean does not define sorting, therefore local properties is empty
        assertTrue(properties.getLocalProperties().isEmpty());
        assertTrue(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties().isEmpty());
        String testString = "test";
        Long testBigint = 89L;
        Boolean testBoolean = true;
        ImmutableMap<ColumnHandle, NullableValue> bindings = ImmutableMap.<ColumnHandle, NullableValue>builder().put(columnHandles.get(columnIndex.get("t_string")), NullableValue.of(createUnboundedVarcharType(), utf8Slice(testString))).put(columnHandles.get(columnIndex.get("t_bigint")), NullableValue.of(BIGINT, testBigint)).put(columnHandles.get(columnIndex.get("t_boolean")), NullableValue.of(BOOLEAN, testBoolean)).buildOrThrow();
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.fromFixedValues(bindings), OptionalInt.of(1), Optional.empty());
        boolean rowFound = false;
        for (MaterializedRow row : result) {
            if (testString.equals(row.getField(columnIndex.get("t_string"))) && testBigint.equals(row.getField(columnIndex.get("t_bigint"))) && testBoolean.equals(row.getField(columnIndex.get("t_boolean")))) {
                rowFound = true;
                break;
            }
        }
        assertTrue(rowFound);
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) NullableValue(io.trino.spi.predicate.NullableValue) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) OptionalLong(java.util.OptionalLong) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) MaterializedRow(io.trino.testing.MaterializedRow) Test(org.testng.annotations.Test)

Example 3 with ConnectorTableProperties

use of io.trino.spi.connector.ConnectorTableProperties in project trino by trinodb.

the class AbstractTestHive method doTestBucketSortedTables.

private void doTestBucketSortedTables(SchemaTableName table) throws IOException {
    int bucketCount = 3;
    int expectedRowCount = 0;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        // begin creating the table
        ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, RCBINARY).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", ASCENDING)).add(new SortingColumn("value_desc", DESCENDING)).build()).buildOrThrow());
        ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
        // write the data
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
        List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
        ThreadLocalRandom random = ThreadLocalRandom.current();
        for (int i = 0; i < 50; i++) {
            MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
            for (int j = 0; j < 1000; j++) {
                builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
                expectedRowCount++;
            }
            sink.appendPage(builder.build().toPage());
        }
        HdfsContext context = new HdfsContext(session);
        // verify we have enough temporary files per bucket to require multiple passes
        Path stagingPathRoot;
        if (isTemporaryStagingDirectoryEnabled(session)) {
            stagingPathRoot = new Path(getTemporaryStagingDirectoryPath(session).replace("${USER}", context.getIdentity().getUser()));
        } else {
            stagingPathRoot = getStagingPathRoot(outputHandle);
        }
        assertThat(listAllDataFiles(context, stagingPathRoot)).filteredOn(file -> file.contains(".tmp-sort.")).size().isGreaterThan(bucketCount * getHiveConfig().getMaxOpenSortFiles() * 2);
        // finish the write
        Collection<Slice> fragments = getFutureValue(sink.finish());
        // verify there are no temporary files
        for (String file : listAllDataFiles(context, stagingPathRoot)) {
            assertThat(file).doesNotContain(".tmp-sort.");
        }
        // finish creating table
        metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
        transaction.commit();
    }
    // verify that bucket files are sorted
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        metadata.beginQuery(session);
        ConnectorTableHandle tableHandle = getTableHandle(metadata, table);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        // verify local sorting property
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true, "bucket_execution_enabled", false)), tableHandle);
        Map<String, Integer> columnIndex = indexColumns(columnHandles);
        assertEquals(properties.getLocalProperties(), ImmutableList.of(new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST)));
        assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty();
        List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
        assertThat(splits).hasSize(bucketCount);
        int actualRowCount = 0;
        for (ConnectorSplit split : splits) {
            try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
                String lastValueAsc = null;
                long lastValueDesc = -1;
                while (!pageSource.isFinished()) {
                    Page page = pageSource.getNextPage();
                    if (page == null) {
                        continue;
                    }
                    for (int i = 0; i < page.getPositionCount(); i++) {
                        Block blockAsc = page.getBlock(1);
                        Block blockDesc = page.getBlock(2);
                        assertFalse(blockAsc.isNull(i));
                        assertFalse(blockDesc.isNull(i));
                        String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
                        if (lastValueAsc != null) {
                            assertGreaterThanOrEqual(valueAsc, lastValueAsc);
                            if (valueAsc.equals(lastValueAsc)) {
                                long valueDesc = BIGINT.getLong(blockDesc, i);
                                if (lastValueDesc != -1) {
                                    assertLessThanOrEqual(valueDesc, lastValueDesc);
                                }
                                lastValueDesc = valueDesc;
                            } else {
                                lastValueDesc = -1;
                            }
                        }
                        lastValueAsc = valueAsc;
                        actualRowCount++;
                    }
                }
            }
        }
        assertThat(actualRowCount).isEqualTo(expectedRowCount);
    }
}
Also used : ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) SortingProperty(io.trino.spi.connector.SortingProperty) Constraint(io.trino.spi.connector.Constraint) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) MapType(io.trino.spi.type.MapType) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) CharType.createCharType(io.trino.spi.type.CharType.createCharType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) Block(io.trino.spi.block.Block) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) MaterializedResult(io.trino.testing.MaterializedResult) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorSplit(io.trino.spi.connector.ConnectorSplit)

Example 4 with ConnectorTableProperties

use of io.trino.spi.connector.ConnectorTableProperties in project trino by trinodb.

the class AbstractTestHive method testGetPartitionNamesUnpartitioned.

@Test
public void testGetPartitionNamesUnpartitioned() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableUnpartitioned);
        tableHandle = applyFilter(metadata, tableHandle, Constraint.alwaysTrue());
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle);
        assertExpectedTableProperties(properties, new ConnectorTableProperties());
        assertExpectedPartitions(tableHandle, tableUnpartitionedPartitions);
    }
}
Also used : ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 5 with ConnectorTableProperties

use of io.trino.spi.connector.ConnectorTableProperties in project trino by trinodb.

the class AbstractTestHive method testGetPartitionsWithBindings.

@Test
public void testGetPartitionsWithBindings() {
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
        Constraint constraint = new Constraint(TupleDomain.withColumnDomains(ImmutableMap.of(intColumn, Domain.singleValue(BIGINT, 5L))));
        tableHandle = applyFilter(metadata, tableHandle, constraint);
        ConnectorTableProperties properties = metadata.getTableProperties(newSession(), tableHandle);
        assertExpectedTableProperties(properties, tablePartitionFormatProperties);
        assertExpectedPartitions(tableHandle, tablePartitionFormatPartitions);
    }
}
Also used : Constraint(io.trino.spi.connector.Constraint) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Aggregations

ConnectorTableProperties (io.trino.spi.connector.ConnectorTableProperties)16 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)10 ImmutableList (com.google.common.collect.ImmutableList)8 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)7 ConnectorMetadata (io.trino.spi.connector.ConnectorMetadata)7 ConnectorSession (io.trino.spi.connector.ConnectorSession)7 SortingProperty (io.trino.spi.connector.SortingProperty)7 SchemaTableName (io.trino.spi.connector.SchemaTableName)6 Test (org.testng.annotations.Test)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Constraint (io.trino.spi.connector.Constraint)5 TupleDomain (io.trino.spi.predicate.TupleDomain)5 List (java.util.List)5 Optional (java.util.Optional)5 ImmutableSet (com.google.common.collect.ImmutableSet)4 Slice (io.airlift.slice.Slice)4 HiveColumnHandle.bucketColumnHandle (io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle)4 ConnectorOutputTableHandle (io.trino.spi.connector.ConnectorOutputTableHandle)4