Search in sources :

Example 31 with TypeManager

use of io.trino.spi.type.TypeManager in project trino by trinodb.

the class HiveMetadata method finishCreateTable.

@Override
public Optional<ConnectorOutputMetadata> finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
    WriteInfo writeInfo = locationService.getQueryWriteInfo(handle.getLocationHandle());
    Table table = buildTableObject(session.getQueryId(), handle.getSchemaName(), handle.getTableName(), handle.getTableOwner(), handle.getInputColumns(), handle.getTableStorageFormat(), handle.getPartitionedBy(), handle.getBucketProperty(), handle.getAdditionalTableParameters(), Optional.of(writeInfo.getTargetPath()), handle.isExternal(), prestoVersion, accessControlMetadata.isUsingSystemSecurity());
    PrincipalPrivileges principalPrivileges = accessControlMetadata.isUsingSystemSecurity() ? NO_PRIVILEGES : buildInitialPrivilegeSet(handle.getTableOwner());
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, true, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
        if (handle.isTransactional()) {
            AcidTransaction transaction = handle.getTransaction();
            List<String> partitionNames = partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList());
            metastore.addDynamicPartitions(handle.getSchemaName(), handle.getTableName(), partitionNames, transaction.getAcidTransactionId(), transaction.getWriteId(), AcidOperation.CREATE_TABLE);
        }
    }
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, handle.getPartitionedBy(), columnTypes);
    PartitionStatistics tableStatistics;
    if (table.getPartitionColumns().isEmpty()) {
        HiveBasicStatistics basicStatistics = partitionUpdates.stream().map(PartitionUpdate::getStatistics).reduce((first, second) -> reduce(first, second, ADD)).orElse(createZeroStatistics());
        tableStatistics = createPartitionStatistics(basicStatistics, columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
    } else {
        tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
    }
    if (handle.getPartitionedBy().isEmpty()) {
        List<String> fileNames;
        if (partitionUpdates.isEmpty()) {
            // creating empty table via CTAS ... WITH NO DATA
            fileNames = ImmutableList.of();
        } else {
            fileNames = getOnlyElement(partitionUpdates).getFileNames();
        }
        metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.of(fileNames), false, tableStatistics, handle.isRetriesEnabled());
    } else {
        metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), Optional.empty(), false, tableStatistics, false);
    }
    if (!handle.getPartitionedBy().isEmpty()) {
        if (isRespectTableFormat(session)) {
            verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat());
        }
        for (PartitionUpdate update : partitionUpdates) {
            Partition partition = buildPartitionObject(session, table, update);
            PartitionStatistics partitionStatistics = createPartitionStatistics(update.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
            metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath(), Optional.of(update.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
        }
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList())));
}
Also used : HiveTableProperties.getBucketProperty(io.trino.plugin.hive.HiveTableProperties.getBucketProperty) InsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) HiveSessionProperties.getQueryPartitionFilterRequiredSchemas(io.trino.plugin.hive.HiveSessionProperties.getQueryPartitionFilterRequiredSchemas) ORC_BLOOM_FILTER_FPP(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) FileSystem(org.apache.hadoop.fs.FileSystem) HiveBucketing.isSupportedBucketing(io.trino.plugin.hive.util.HiveBucketing.isSupportedBucketing) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.hive.HiveTableProperties.ANALYZE_COLUMNS_PROPERTY) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveTableProperties.getFooterSkipCount(io.trino.plugin.hive.HiveTableProperties.getFooterSkipCount) TABLE_NOT_FOUND(io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND) Map(java.util.Map) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) HiveSessionProperties.isBucketExecutionEnabled(io.trino.plugin.hive.HiveSessionProperties.isBucketExecutionEnabled) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Domain(io.trino.spi.predicate.Domain) HiveSessionProperties.isStatisticsEnabled(io.trino.plugin.hive.HiveSessionProperties.isStatisticsEnabled) NULL_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY) HiveUtil.verifyPartitionTypeSupported(io.trino.plugin.hive.util.HiveUtil.verifyPartitionTypeSupported) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) AcidUtils.deltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deltaSubdir) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) HIVE_CONCURRENT_MODIFICATION_DETECTED(io.trino.plugin.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) HIVE_UNKNOWN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) HiveSessionProperties.isSortedWritingEnabled(io.trino.plugin.hive.HiveSessionProperties.isSortedWritingEnabled) Joiner(com.google.common.base.Joiner) Partition(io.trino.plugin.hive.metastore.Partition) HiveColumnHandle.updateRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle) HiveUtil(io.trino.plugin.hive.util.HiveUtil) HiveWriterFactory.computeNonTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeNonTransactionalBucketedFilename) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) Supplier(java.util.function.Supplier) HiveTableProperties.getSingleCharacterProperty(io.trino.plugin.hive.HiveTableProperties.getSingleCharacterProperty) TimestampType(io.trino.spi.type.TimestampType) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) HiveSessionProperties.getCompressionCodec(io.trino.plugin.hive.HiveSessionProperties.getCompressionCodec) OptionalLong(java.util.OptionalLong) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) PATH_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME) GrantInfo(io.trino.spi.security.GrantInfo) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveAnalyzeProperties.getColumnNames(io.trino.plugin.hive.HiveAnalyzeProperties.getColumnNames) MapType(io.trino.spi.type.MapType) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTableProperties.getPartitionedBy(io.trino.plugin.hive.HiveTableProperties.getPartitionedBy) CatalogName(io.trino.plugin.base.CatalogName) HiveSessionProperties.isCollectColumnStatisticsOnWrite(io.trino.plugin.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) AVRO_SCHEMA_URL(io.trino.plugin.hive.HiveTableProperties.AVRO_SCHEMA_URL) HiveBucketing.getHiveBucketHandle(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketHandle) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) HiveTableProperties.getHeaderSkipCount(io.trino.plugin.hive.HiveTableProperties.getHeaderSkipCount) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) HiveWriteUtils.initializeSerializer(io.trino.plugin.hive.util.HiveWriteUtils.initializeSerializer) HiveSessionProperties.isCreateEmptyBucketFiles(io.trino.plugin.hive.HiveSessionProperties.isCreateEmptyBucketFiles) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HiveSessionProperties.isNonTransactionalOptimizeEnabled(io.trino.plugin.hive.HiveSessionProperties.isNonTransactionalOptimizeEnabled) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) HiveTableProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat) HiveTableProperties.getAvroSchemaUrl(io.trino.plugin.hive.HiveTableProperties.getAvroSchemaUrl) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MetadataProvider(io.trino.spi.connector.MetadataProvider) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Collectors.toMap(java.util.stream.Collectors.toMap) Block(io.trino.spi.block.Block) ViewReaderUtil.encodeViewData(io.trino.plugin.hive.ViewReaderUtil.encodeViewData) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) ImmutableSet(com.google.common.collect.ImmutableSet) SemiTransactionalHiveMetastore.cleanExtraOutputFiles(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore.cleanExtraOutputFiles) Collection(java.util.Collection) VIEW_STORAGE_FORMAT(io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ROW_COUNT(io.trino.spi.statistics.TableStatisticType.ROW_COUNT) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) StandardErrorCode(io.trino.spi.StandardErrorCode) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) TableStatisticType(io.trino.spi.statistics.TableStatisticType) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) HiveSessionProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveSessionProperties.getHiveStorageFormat) AUTO_PURGE(io.trino.plugin.hive.HiveTableProperties.AUTO_PURGE) NANOSECONDS(io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS) CompletableFuture(java.util.concurrent.CompletableFuture) HiveSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.hive.HiveSessionProperties.isProjectionPushdownEnabled) Statistics.createComputedStatisticsToPartitionMap(io.trino.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HiveTableProperties.isTransactional(io.trino.plugin.hive.HiveTableProperties.isTransactional) HashSet(java.util.HashSet) ViewReaderUtil.createViewReader(io.trino.plugin.hive.ViewReaderUtil.createViewReader) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) ImmutableList(com.google.common.collect.ImmutableList) TEXTFILE_FIELD_SEPARATOR_ESCAPE(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR_ESCAPE) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) NoSuchElementException(java.util.NoSuchElementException) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) RetryMode(io.trino.spi.connector.RetryMode) NEW(io.trino.plugin.hive.PartitionUpdate.UpdateMode.NEW) HIVE_VIEW_TRANSLATION_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_VIEW_TRANSLATION_ERROR) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveWriteUtils.isFileCreatedByQuery(io.trino.plugin.hive.util.HiveWriteUtils.isFileCreatedByQuery) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) JobConf(org.apache.hadoop.mapred.JobConf) HiveTableProperties.getNullFormat(io.trino.plugin.hive.HiveTableProperties.getNullFormat) HiveTableProperties.getOrcBloomFilterFpp(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterFpp) OrcAcidVersion.writeVersionFile(org.apache.hadoop.hive.ql.io.AcidUtils.OrcAcidVersion.writeVersionFile) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) HiveWriterFactory.computeTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeTransactionalBucketedFilename) TypeManager(io.trino.spi.type.TypeManager) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) HiveTableProperties.getExternalLocation(io.trino.plugin.hive.HiveTableProperties.getExternalLocation) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) Column(io.trino.plugin.hive.metastore.Column) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PARTITION_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME) ORC_BLOOM_FILTER_COLUMNS(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) Table(io.trino.plugin.hive.metastore.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) MetastoreUtil.getHiveSchema(io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) HIVE_COLUMN_ORDER_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) CSV_QUOTE(io.trino.plugin.hive.HiveTableProperties.CSV_QUOTE) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HIVE_INVALID_VIEW_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) NON_TRANSACTIONAL_OPTIMIZE_ENABLED(io.trino.plugin.hive.HiveSessionProperties.NON_TRANSACTIONAL_OPTIMIZE_ENABLED) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) ArrayList(java.util.ArrayList) PrincipalPrivileges.fromHivePrivilegeInfos(io.trino.plugin.hive.metastore.PrincipalPrivileges.fromHivePrivilegeInfos) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveUtil.hiveColumnHandles(io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) ViewReaderUtil.isPrestoView(io.trino.plugin.hive.ViewReaderUtil.isPrestoView) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveSessionProperties.isRespectTableFormat(io.trino.plugin.hive.HiveSessionProperties.isRespectTableFormat) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) Properties(java.util.Properties) HiveWriteUtils.checkTableIsWritable(io.trino.plugin.hive.util.HiveWriteUtils.checkTableIsWritable) EXTERNAL_LOCATION_PROPERTY(io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) TEXTFILE_FIELD_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveStatisticsProvider(io.trino.plugin.hive.statistics.HiveStatisticsProvider) RoleGrant(io.trino.spi.security.RoleGrant) File(java.io.File) HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore(io.trino.plugin.hive.HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) Sets.intersection(com.google.common.collect.Sets.intersection) TableType(org.apache.hadoop.hive.metastore.TableType) HiveSessionProperties.isParallelPartitionedBucketedWrites(io.trino.plugin.hive.HiveSessionProperties.isParallelPartitionedBucketedWrites) ViewReaderUtil.isHiveOrPrestoView(io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView) HiveSessionProperties.isQueryPartitionFilterRequired(io.trino.plugin.hive.HiveSessionProperties.isQueryPartitionFilterRequired) HIVE_WRITER_CLOSE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR) URL(java.net.URL) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HiveBucketing(io.trino.plugin.hive.util.HiveBucketing) Iterables.concat(com.google.common.collect.Iterables.concat) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) ErrorType(io.trino.spi.ErrorType) HIVE_FILESYSTEM_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) Splitter(com.google.common.base.Splitter) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OVERWRITE(io.trino.plugin.hive.PartitionUpdate.UpdateMode.OVERWRITE) SKIP_HEADER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) AccessControlMetadata(io.trino.plugin.hive.security.AccessControlMetadata) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HiveTableProperties.getAnalyzeColumns(io.trino.plugin.hive.HiveTableProperties.getAnalyzeColumns) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) HiveApplyProjectionUtil.find(io.trino.plugin.hive.HiveApplyProjectionUtil.find) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) FILE_SIZE_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) Assignment(io.trino.spi.connector.Assignment) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) SystemTable(io.trino.spi.connector.SystemTable) CSV_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.CSV_SEPARATOR) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) HiveSessionProperties.isOptimizedMismatchedBucketCount(io.trino.plugin.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Verify.verify(com.google.common.base.Verify.verify) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) HiveTableProperties.getOrcBloomFilterColumns(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterColumns) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) Privilege(io.trino.spi.security.Privilege) VerifyException(com.google.common.base.VerifyException) APPEND(io.trino.plugin.hive.PartitionUpdate.UpdateMode.APPEND) HiveSessionProperties.getInsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) MalformedURLException(java.net.MalformedURLException) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OptimizeTableProcedure(io.trino.plugin.hive.procedure.OptimizeTableProcedure) SKIP_FOOTER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT) SYNTHESIZED(io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) LocalProperty(io.trino.spi.connector.LocalProperty) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveUtil.getPartitionKeyColumnHandles(io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles) Collections(java.util.Collections) CSV_ESCAPE(io.trino.plugin.hive.HiveTableProperties.CSV_ESCAPE) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Partition(io.trino.plugin.hive.metastore.Partition) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) SystemTable(io.trino.spi.connector.SystemTable) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) TimestampType(io.trino.spi.type.TimestampType) MapType(io.trino.spi.type.MapType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TableType(org.apache.hadoop.hive.metastore.TableType) ErrorType(io.trino.spi.ErrorType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) ImmutableList(com.google.common.collect.ImmutableList) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List)

Example 32 with TypeManager

use of io.trino.spi.type.TypeManager in project trino by trinodb.

the class HiveSplitManager method getSplits.

@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
    HiveTableHandle hiveTable = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTable.getSchemaTableName();
    // get table metadata
    SemiTransactionalHiveMetastore metastore = transactionManager.get(transaction, session.getIdentity()).getMetastore();
    Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    // verify table is not marked as non-readable
    String tableNotReadable = table.getParameters().get(OBJECT_NOT_READABLE);
    if (!isNullOrEmpty(tableNotReadable)) {
        throw new HiveNotReadableException(tableName, Optional.empty(), tableNotReadable);
    }
    // get partitions
    List<HivePartition> partitions = partitionManager.getOrLoadPartitions(metastore, hiveTable);
    // short circuit if we don't have any partitions
    if (partitions.isEmpty()) {
        if (hiveTable.isRecordScannedFiles()) {
            return new FixedSplitSource(ImmutableList.of(), ImmutableList.of());
        }
        return new FixedSplitSource(ImmutableList.of());
    }
    // get buckets from first partition (arbitrary)
    Optional<HiveBucketFilter> bucketFilter = hiveTable.getBucketFilter();
    // validate bucket bucketed execution
    Optional<HiveBucketHandle> bucketHandle = hiveTable.getBucketHandle();
    if ((splitSchedulingStrategy == GROUPED_SCHEDULING) && bucketHandle.isEmpty()) {
        throw new TrinoException(GENERIC_INTERNAL_ERROR, "SchedulingPolicy is bucketed, but BucketHandle is not present");
    }
    // sort partitions
    partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
    Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(session, metastore, table, tableName, partitions, bucketHandle.map(HiveBucketHandle::toTableBucketProperty));
    // Only one thread per partition is usable when a table is not transactional
    int concurrency = isTransactionalTable(table.getParameters()) ? splitLoaderConcurrency : min(splitLoaderConcurrency, partitions.size());
    HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(table, hiveTable.getTransaction(), hivePartitions, hiveTable.getCompactEffectivePredicate(), dynamicFilter, getDynamicFilteringWaitTimeout(session), typeManager, createBucketSplitInfo(bucketHandle, bucketFilter), session, hdfsEnvironment, namenodeStats, directoryLister, executor, concurrency, recursiveDfsWalkerEnabled, !hiveTable.getPartitionColumns().isEmpty() && isIgnoreAbsentPartitions(session), isOptimizeSymlinkListing(session), metastore.getValidWriteIds(session, hiveTable).map(validTxnWriteIdList -> validTxnWriteIdList.getTableValidWriteIdList(table.getDatabaseName() + "." + table.getTableName())), hiveTable.getMaxScannedFileSize());
    HiveSplitSource splitSource;
    switch(splitSchedulingStrategy) {
        case UNGROUPED_SCHEDULING:
            splitSource = HiveSplitSource.allAtOnce(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        case GROUPED_SCHEDULING:
            splitSource = HiveSplitSource.bucketed(session, table.getDatabaseName(), table.getTableName(), maxInitialSplits, maxOutstandingSplits, maxOutstandingSplitsSize, maxSplitsPerSecond, hiveSplitLoader, executor, highMemorySplitSourceCounter, hiveTable.isRecordScannedFiles());
            break;
        default:
            throw new IllegalArgumentException("Unknown splitSchedulingStrategy: " + splitSchedulingStrategy);
    }
    hiveSplitLoader.start(splitSource);
    return splitSource;
}
Also used : ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Iterables.transform(com.google.common.collect.Iterables.transform) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) HiveSessionProperties.isOptimizeSymlinkListing(io.trino.plugin.hive.HiveSessionProperties.isOptimizeSymlinkListing) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) HiveSessionProperties.isIgnoreAbsentPartitions(io.trino.plugin.hive.HiveSessionProperties.isIgnoreAbsentPartitions) HiveSessionProperties.isUseParquetColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseParquetColumnNames) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Iterables.concat(com.google.common.collect.Iterables.concat) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Map(java.util.Map) ENGLISH(java.util.Locale.ENGLISH) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) TableToPartitionMapping.mapColumnsByIndex(io.trino.plugin.hive.TableToPartitionMapping.mapColumnsByIndex) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TrinoException(io.trino.spi.TrinoException) Math.min(java.lang.Math.min) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) DataSize(io.airlift.units.DataSize) List(java.util.List) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) VersionEmbedder(io.trino.spi.VersionEmbedder) DynamicFilter(io.trino.spi.connector.DynamicFilter) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HiveStorageFormat.getHiveStorageFormat(io.trino.plugin.hive.HiveStorageFormat.getHiveStorageFormat) SERVER_SHUTTING_DOWN(io.trino.spi.StandardErrorCode.SERVER_SHUTTING_DOWN) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) HiveCoercionPolicy.canCoerce(io.trino.plugin.hive.util.HiveCoercionPolicy.canCoerce) Partition(io.trino.plugin.hive.metastore.Partition) HiveUtil(io.trino.plugin.hive.util.HiveUtil) Nested(org.weakref.jmx.Nested) BucketSplitInfo.createBucketSplitInfo(io.trino.plugin.hive.BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) CounterStat(io.airlift.stats.CounterStat) UNPARTITIONED_ID(io.trino.plugin.hive.HivePartition.UNPARTITIONED_ID) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) HIVE_PARTITION_DROPPED_DURING_QUERY(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY) Inject(javax.inject.Inject) GROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.GROUPED_SCHEDULING) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Iterator(java.util.Iterator) Executor(java.util.concurrent.Executor) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) AbstractIterator(com.google.common.collect.AbstractIterator) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ConnectorSession(io.trino.spi.connector.ConnectorSession) GENERIC_INTERNAL_ERROR(io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Ordering(com.google.common.collect.Ordering) HiveSessionProperties.getDynamicFilteringWaitTimeout(io.trino.plugin.hive.HiveSessionProperties.getDynamicFilteringWaitTimeout) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) TypeManager(io.trino.spi.type.TypeManager) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) SchemaTableName(io.trino.spi.connector.SchemaTableName) HiveBucketFilter(io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) FixedSplitSource(io.trino.spi.connector.FixedSplitSource) TrinoException(io.trino.spi.TrinoException)

Example 33 with TypeManager

use of io.trino.spi.type.TypeManager in project trino by trinodb.

the class HiveWriterFactory method createWriter.

public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt bucketNumber) {
    if (bucketCount.isPresent()) {
        checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
        checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
    } else {
        checkArgument(bucketNumber.isEmpty(), "Bucket number provided by for table that is not bucketed");
    }
    List<String> partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position);
    Optional<String> partitionName;
    if (!partitionColumnNames.isEmpty()) {
        partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
    } else {
        partitionName = Optional.empty();
    }
    // attempt to get the existing partition (if this is an existing partitioned table)
    Optional<Partition> partition = Optional.empty();
    if (!partitionValues.isEmpty() && table != null) {
        partition = pageSinkMetadataProvider.getPartition(partitionValues);
    }
    UpdateMode updateMode;
    Properties schema;
    WriteInfo writeInfo;
    StorageFormat outputStorageFormat;
    if (partition.isEmpty()) {
        if (table == null) {
            // Write to: a new partition in a new partitioned table,
            // or a new unpartitioned table.
            updateMode = UpdateMode.NEW;
            schema = new Properties();
            schema.setProperty(IOConstants.COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(",")));
            schema.setProperty(IOConstants.COLUMNS_TYPES, dataColumns.stream().map(DataColumn::getHiveType).map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));
            if (partitionName.isEmpty()) {
                // new unpartitioned table
                writeInfo = locationService.getTableWriteInfo(locationHandle, false);
            } else {
                // a new partition in a new partitioned table
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
                if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) {
                    // verify that the target directory for the partition does not already exist
                    if (HiveWriteUtils.pathExists(new HdfsContext(session), hdfsEnvironment, writeInfo.getTargetPath())) {
                        throw new TrinoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, writeInfo.getTargetPath()));
                    }
                }
            }
        } else {
            // or an existing unpartitioned table
            if (partitionName.isPresent()) {
                // a new partition in an existing partitioned table
                updateMode = UpdateMode.NEW;
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
            } else {
                switch(insertExistingPartitionsBehavior) {
                    case APPEND:
                        updateMode = UpdateMode.APPEND;
                        writeInfo = locationService.getTableWriteInfo(locationHandle, false);
                        break;
                    case OVERWRITE:
                        updateMode = UpdateMode.OVERWRITE;
                        writeInfo = locationService.getTableWriteInfo(locationHandle, true);
                        break;
                    case ERROR:
                        throw new TrinoException(HIVE_TABLE_READ_ONLY, "Unpartitioned Hive tables are immutable");
                    default:
                        throw new IllegalArgumentException("Unsupported insert existing table behavior: " + insertExistingPartitionsBehavior);
                }
            }
            schema = getHiveSchema(table);
        }
        if (partitionName.isPresent()) {
            // Write to a new partition
            outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
        } else {
            // Write to a new/existing unpartitioned table
            outputStorageFormat = fromHiveStorageFormat(tableStorageFormat);
        }
    } else {
        switch(insertExistingPartitionsBehavior) {
            // Write to: an existing partition in an existing partitioned table
            case APPEND:
                // Append to an existing partition
                updateMode = UpdateMode.APPEND;
                // Check the column types in partition schema match the column types in table schema
                List<Column> tableColumns = table.getDataColumns();
                List<Column> existingPartitionColumns = partition.get().getColumns();
                for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) {
                    HiveType tableType = tableColumns.get(i).getType();
                    HiveType partitionType = existingPartitionColumns.get(i).getType();
                    if (!tableType.equals(partitionType)) {
                        throw new TrinoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "You are trying to write into an existing partition in a table. " + "The table schema has changed since the creation of the partition. " + "Inserting rows into such partition is not supported. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType));
                    }
                }
                HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get());
                outputStorageFormat = partition.get().getStorage().getStorageFormat();
                schema = getHiveSchema(partition.get(), table);
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
                break;
            case OVERWRITE:
                // Overwrite an existing partition
                // 
                // The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus:
                // * No partition writable check is required.
                // * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format).
                updateMode = UpdateMode.OVERWRITE;
                outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
                schema = getHiveSchema(table);
                writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get());
                break;
            case ERROR:
                throw new TrinoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionName.get());
            default:
                throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s", insertExistingPartitionsBehavior));
        }
    }
    additionalTableParameters.forEach(schema::setProperty);
    validateSchema(partitionName, schema);
    int bucketToUse = bucketNumber.isEmpty() ? 0 : bucketNumber.getAsInt();
    Path path;
    String fileNameWithExtension;
    if (transaction.isAcidTransactionRunning()) {
        String subdir = computeAcidSubdir(transaction);
        Path subdirPath = new Path(writeInfo.getWritePath(), subdir);
        path = createHiveBucketPath(subdirPath, bucketToUse, table.getParameters());
        fileNameWithExtension = path.getName();
    } else {
        String fileName = computeFileName(bucketNumber);
        fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat);
        path = new Path(writeInfo.getWritePath(), fileNameWithExtension);
    }
    boolean useAcidSchema = isCreateTransactionalTable || (table != null && isFullAcidTable(table.getParameters()));
    FileWriter hiveFileWriter = null;
    for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
        Optional<FileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, conf, session, bucketNumber, transaction, useAcidSchema, WriterKind.INSERT);
        if (fileWriter.isPresent()) {
            hiveFileWriter = fileWriter.get();
            break;
        }
    }
    if (hiveFileWriter == null) {
        hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterMemoryUsage(), conf, typeManager, parquetTimeZone, session);
    }
    String writerImplementation = hiveFileWriter.getClass().getName();
    Consumer<HiveWriter> onCommit = hiveWriter -> {
        Optional<Long> size;
        try {
            size = Optional.of(hiveWriter.getWrittenBytes());
        } catch (RuntimeException e) {
            // Do not fail the query if file system is not available
            size = Optional.empty();
        }
        eventClient.post(new WriteCompletedEvent(session.getQueryId(), path.toString(), schemaName, tableName, partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation, nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHost(), session.getIdentity().getPrincipal().map(Principal::getName).orElse(null), nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount()));
    };
    if (!sortedBy.isEmpty()) {
        FileSystem fileSystem;
        Path tempFilePath;
        if (sortedWritingTempStagingPathEnabled) {
            String tempPrefix = sortedWritingTempStagingPath.replace("${USER}", new HdfsContext(session).getIdentity().getUser());
            tempFilePath = new Path(tempPrefix, ".tmp-sort." + path.getParent().getName() + "." + path.getName());
        } else {
            tempFilePath = new Path(path.getParent(), ".tmp-sort." + path.getName());
        }
        try {
            Configuration configuration = new Configuration(conf);
            // Explicitly set the default FS to local file system to avoid getting HDFS when sortedWritingTempStagingPath specifies no scheme
            configuration.set(FS_DEFAULT_NAME_KEY, "file:///");
            fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), tempFilePath, configuration);
        } catch (IOException e) {
            throw new TrinoException(HIVE_WRITER_OPEN_ERROR, e);
        }
        List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager, getTimestampPrecision(session))).collect(toImmutableList());
        Map<String, Integer> columnIndexes = new HashMap<>();
        for (int i = 0; i < dataColumns.size(); i++) {
            columnIndexes.put(dataColumns.get(i).getName(), i);
        }
        List<Integer> sortFields = new ArrayList<>();
        List<SortOrder> sortOrders = new ArrayList<>();
        for (SortingColumn column : sortedBy) {
            Integer index = columnIndexes.get(column.getColumnName());
            if (index == null) {
                throw new TrinoException(HIVE_INVALID_METADATA, format("Sorting column '%s' does exist in table '%s.%s'", column.getColumnName(), schemaName, tableName));
            }
            sortFields.add(index);
            sortOrders.add(column.getOrder().getSortOrder());
        }
        hiveFileWriter = new SortingFileWriter(fileSystem, tempFilePath, hiveFileWriter, sortBufferSize, maxOpenSortFiles, types, sortFields, sortOrders, pageSorter, typeManager.getTypeOperators(), OrcFileWriterFactory::createOrcDataSink);
    }
    return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension, writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), onCommit, hiveWriterStats);
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) InsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) FileSystem(org.apache.hadoop.fs.FileSystem) NodeManager(io.trino.spi.NodeManager) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) AcidUtils.isInsertOnlyTable(org.apache.hadoop.hive.ql.io.AcidUtils.isInsertOnlyTable) HiveUtil.getColumnNames(io.trino.plugin.hive.util.HiveUtil.getColumnNames) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Matcher(java.util.regex.Matcher) Column(io.trino.plugin.hive.metastore.Column) HiveIgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat) Collectors.toMap(java.util.stream.Collectors.toMap) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) PageSorter(io.trino.spi.PageSorter) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HIVE_FILESYSTEM_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) ImmutableSet(com.google.common.collect.ImmutableSet) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) EventClient(io.airlift.event.client.EventClient) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) UUID(java.util.UUID) Math.min(java.lang.Math.min) HIVE_PATH_ALREADY_EXISTS(io.trino.plugin.hive.HiveErrorCode.HIVE_PATH_ALREADY_EXISTS) MetastoreUtil.getHiveSchema(io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema) AcidUtils.deleteDeltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deleteDeltaSubdir) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) ReflectionUtil(org.apache.hive.common.util.ReflectionUtil) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Principal(java.security.Principal) AcidUtils.deltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deltaSubdir) HivePageSinkMetadataProvider(io.trino.plugin.hive.metastore.HivePageSinkMetadataProvider) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) Partition(io.trino.plugin.hive.metastore.Partition) Type(io.trino.spi.type.Type) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) Page(io.trino.spi.Page) HashMap(java.util.HashMap) HiveWriteUtils.createPartitionValues(io.trino.plugin.hive.util.HiveWriteUtils.createPartitionValues) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) OptionalInt(java.util.OptionalInt) HIVE_PARTITION_READ_ONLY(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_READ_ONLY) ArrayList(java.util.ArrayList) HiveSessionProperties.getCompressionCodec(io.trino.plugin.hive.HiveSessionProperties.getCompressionCodec) HashSet(java.util.HashSet) Strings(com.google.common.base.Strings) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList(com.google.common.collect.ImmutableList) COMPRESSRESULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveSessionProperties.isTemporaryStagingDirectoryEnabled(io.trino.plugin.hive.HiveSessionProperties.isTemporaryStagingDirectoryEnabled) HIVE_WRITER_OPEN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveSessionProperties.getInsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) HiveUtil.getColumnTypes(io.trino.plugin.hive.util.HiveUtil.getColumnTypes) Properties(java.util.Properties) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) SortOrder(io.trino.spi.connector.SortOrder) JobConf(org.apache.hadoop.mapred.JobConf) Consumer(java.util.function.Consumer) UUID.randomUUID(java.util.UUID.randomUUID) Collectors.toList(java.util.stream.Collectors.toList) FS_DEFAULT_NAME_KEY(org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) HIVE_TABLE_READ_ONLY(io.trino.plugin.hive.HiveErrorCode.HIVE_TABLE_READ_ONLY) UpdateMode(io.trino.plugin.hive.PartitionUpdate.UpdateMode) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) TypeManager(io.trino.spi.type.TypeManager) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) UpdateMode(io.trino.plugin.hive.PartitionUpdate.UpdateMode) ArrayList(java.util.ArrayList) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) Properties(java.util.Properties) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) FileSystem(org.apache.hadoop.fs.FileSystem) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) Path(org.apache.hadoop.fs.Path) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) Partition(io.trino.plugin.hive.metastore.Partition) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) Optional(java.util.Optional) SortOrder(io.trino.spi.connector.SortOrder) IOException(java.io.IOException) Type(io.trino.spi.type.Type) TrinoException(io.trino.spi.TrinoException) Principal(java.security.Principal)

Example 34 with TypeManager

use of io.trino.spi.type.TypeManager in project trino by trinodb.

the class MongoClientModule method createMongoSession.

@Singleton
@Provides
public static MongoSession createMongoSession(TypeManager typeManager, MongoClientConfig config) {
    requireNonNull(config, "config is null");
    MongoClientSettings.Builder options = MongoClientSettings.builder();
    options.writeConcern(config.getWriteConcern().getWriteConcern()).readPreference(config.getReadPreference().getReadPreference()).applyToConnectionPoolSettings(builder -> builder.maxConnectionIdleTime(config.getMaxConnectionIdleTime(), MILLISECONDS).maxWaitTime(config.getMaxWaitTime(), MILLISECONDS).minSize(config.getMinConnectionsPerHost()).maxSize(config.getConnectionsPerHost())).applyToSocketSettings(builder -> builder.connectTimeout(config.getConnectionTimeout(), MILLISECONDS).readTimeout(config.getSocketTimeout(), MILLISECONDS)).applyToSslSettings(builder -> builder.enabled(config.getSslEnabled()));
    if (config.getRequiredReplicaSetName() != null) {
        options.applyToClusterSettings(builder -> builder.requiredReplicaSetName(config.getRequiredReplicaSetName()));
    }
    if (config.getConnectionUrl().isPresent()) {
        options.applyConnectionString(new ConnectionString(config.getConnectionUrl().get()));
    } else {
        options.applyToClusterSettings(builder -> builder.hosts(config.getSeeds()));
        if (!config.getCredentials().isEmpty()) {
            options.credential(config.getCredentials().get(0));
        }
    }
    MongoClient client = MongoClients.create(options.build());
    return new MongoSession(typeManager, client, config);
}
Also used : Module(com.google.inject.Module) MongoClient(com.mongodb.client.MongoClient) MongoClients(com.mongodb.client.MongoClients) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Singleton(javax.inject.Singleton) Scopes(com.google.inject.Scopes) Provides(com.google.inject.Provides) ConnectionString(com.mongodb.ConnectionString) Binder(com.google.inject.Binder) Objects.requireNonNull(java.util.Objects.requireNonNull) MongoClientSettings(com.mongodb.MongoClientSettings) ConfigBinder.configBinder(io.airlift.configuration.ConfigBinder.configBinder) TypeManager(io.trino.spi.type.TypeManager) MongoClient(com.mongodb.client.MongoClient) MongoClientSettings(com.mongodb.MongoClientSettings) ConnectionString(com.mongodb.ConnectionString) Singleton(javax.inject.Singleton) Provides(com.google.inject.Provides)

Example 35 with TypeManager

use of io.trino.spi.type.TypeManager in project trino by trinodb.

the class RaptorStorageManager method toOrcFileType.

static Type toOrcFileType(Type raptorType, TypeManager typeManager) {
    // TIMESTAMPS are stored as BIGINT to void the poor encoding in ORC
    if (raptorType.equals(TIMESTAMP_MILLIS)) {
        return BIGINT;
    }
    if (raptorType instanceof ArrayType) {
        Type elementType = toOrcFileType(((ArrayType) raptorType).getElementType(), typeManager);
        return new ArrayType(elementType);
    }
    if (raptorType instanceof MapType) {
        TypeSignature keyType = toOrcFileType(((MapType) raptorType).getKeyType(), typeManager).getTypeSignature();
        TypeSignature valueType = toOrcFileType(((MapType) raptorType).getValueType(), typeManager).getTypeSignature();
        return typeManager.getParameterizedType(StandardTypes.MAP, ImmutableList.of(TypeSignatureParameter.typeParameter(keyType), TypeSignatureParameter.typeParameter(valueType)));
    }
    if (raptorType instanceof RowType) {
        List<Field> fields = ((RowType) raptorType).getFields().stream().map(field -> new Field(field.getName(), toOrcFileType(field.getType(), typeManager))).collect(toImmutableList());
        return RowType.from(fields);
    }
    return raptorType;
}
Also used : ArrayType(io.trino.spi.type.ArrayType) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) MoreFutures.allAsList(io.airlift.concurrent.MoreFutures.allAsList) OrcPredicate(io.trino.orc.OrcPredicate) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) CharType.createCharType(io.trino.spi.type.CharType.createCharType) RAPTOR_RECOVERY_TIMEOUT(io.trino.plugin.raptor.legacy.RaptorErrorCode.RAPTOR_RECOVERY_TIMEOUT) Future(java.util.concurrent.Future) ShardDelta(io.trino.plugin.raptor.legacy.metadata.ShardDelta) Slices(io.airlift.slice.Slices) Map(java.util.Map) RaptorColumnHandle.isShardUuidColumn(io.trino.plugin.raptor.legacy.RaptorColumnHandle.isShardUuidColumn) FileOrcDataSource(io.trino.orc.FileOrcDataSource) RAPTOR_ERROR(io.trino.plugin.raptor.legacy.RaptorErrorCode.RAPTOR_ERROR) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) BackupStore(io.trino.plugin.raptor.legacy.backup.BackupStore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) Slice(io.airlift.slice.Slice) OrcFileInfo(io.trino.plugin.raptor.legacy.storage.OrcFileRewriter.OrcFileInfo) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) Page(io.trino.spi.Page) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) ShardRecorder(io.trino.plugin.raptor.legacy.metadata.ShardRecorder) ArrayList(java.util.ArrayList) OptionalLong(java.util.OptionalLong) ShardStats.computeColumnStats(io.trino.plugin.raptor.legacy.storage.ShardStats.computeColumnStats) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) OrcType(io.trino.orc.metadata.OrcType) ATOMIC_MOVE(java.nio.file.StandardCopyOption.ATOMIC_MOVE) Files(java.nio.file.Files) MapType(io.trino.spi.type.MapType) StandardTypes(io.trino.spi.type.StandardTypes) IOException(java.io.IOException) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UTC(org.joda.time.DateTimeZone.UTC) CatalogName(io.trino.plugin.base.CatalogName) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) File(java.io.File) RAPTOR_RECOVERY_ERROR(io.trino.plugin.raptor.legacy.RaptorErrorCode.RAPTOR_RECOVERY_ERROR) ExecutionException(java.util.concurrent.ExecutionException) JsonCodec.jsonCodec(io.airlift.json.JsonCodec.jsonCodec) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) ShardInfo(io.trino.plugin.raptor.legacy.metadata.ShardInfo) OrcColumn(io.trino.orc.OrcColumn) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) OrcColumnId(io.trino.orc.metadata.OrcColumnId) NodeManager(io.trino.spi.NodeManager) CompletableFuture.completedFuture(java.util.concurrent.CompletableFuture.completedFuture) TimeoutException(java.util.concurrent.TimeoutException) BackupManager(io.trino.plugin.raptor.legacy.backup.BackupManager) Duration(io.airlift.units.Duration) ColumnInfo(io.trino.plugin.raptor.legacy.metadata.ColumnInfo) PreDestroy(javax.annotation.PreDestroy) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) OrcRecordReader(io.trino.orc.OrcRecordReader) OrcDataSource(io.trino.orc.OrcDataSource) ROOT_COLUMN(io.trino.orc.metadata.OrcColumnId.ROOT_COLUMN) TypeSignature(io.trino.spi.type.TypeSignature) RowType(io.trino.spi.type.RowType) ImmutableSet(com.google.common.collect.ImmutableSet) Field(io.trino.spi.type.RowType.Field) Collection(java.util.Collection) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) UUID(java.util.UUID) Math.min(java.lang.Math.min) RaptorColumnHandle(io.trino.plugin.raptor.legacy.RaptorColumnHandle) FileNotFoundException(java.io.FileNotFoundException) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) DecimalType(io.trino.spi.type.DecimalType) JsonCodec(io.airlift.json.JsonCodec) RaptorColumnHandle.isHiddenColumn(io.trino.plugin.raptor.legacy.RaptorColumnHandle.isHiddenColumn) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) CompletableFuture(java.util.concurrent.CompletableFuture) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) OptionalInt(java.util.OptionalInt) ColumnAdaptation(io.trino.plugin.raptor.legacy.storage.RaptorPageSource.ColumnAdaptation) Inject(javax.inject.Inject) ImmutableList(com.google.common.collect.ImmutableList) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) CompletableFuture.supplyAsync(java.util.concurrent.CompletableFuture.supplyAsync) RowFieldName(io.trino.spi.type.RowFieldName) ExecutorService(java.util.concurrent.ExecutorService) OrcReader(io.trino.orc.OrcReader) RAPTOR_LOCAL_DISK_FULL(io.trino.plugin.raptor.legacy.RaptorErrorCode.RAPTOR_LOCAL_DISK_FULL) PETABYTE(io.airlift.units.DataSize.Unit.PETABYTE) XxHash64(io.airlift.slice.XxHash64) ColumnMetadata(io.trino.orc.metadata.ColumnMetadata) FileInputStream(java.io.FileInputStream) TupleDomain(io.trino.spi.predicate.TupleDomain) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) TimeUnit(java.util.concurrent.TimeUnit) RaptorColumnHandle.isShardRowIdColumn(io.trino.plugin.raptor.legacy.RaptorColumnHandle.isShardRowIdColumn) RaptorColumnHandle.isBucketNumberColumn(io.trino.plugin.raptor.legacy.RaptorColumnHandle.isBucketNumberColumn) Collectors.toList(java.util.stream.Collectors.toList) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ColumnStats(io.trino.plugin.raptor.legacy.metadata.ColumnStats) Closeable(java.io.Closeable) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BitSet(java.util.BitSet) TypeManager(io.trino.spi.type.TypeManager) InputStream(java.io.InputStream) Field(io.trino.spi.type.RowType.Field) CharType.createCharType(io.trino.spi.type.CharType.createCharType) OrcType(io.trino.orc.metadata.OrcType) MapType(io.trino.spi.type.MapType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) TypeSignature(io.trino.spi.type.TypeSignature) RowType(io.trino.spi.type.RowType) MapType(io.trino.spi.type.MapType)

Aggregations

TypeManager (io.trino.spi.type.TypeManager)35 List (java.util.List)28 Objects.requireNonNull (java.util.Objects.requireNonNull)28 Optional (java.util.Optional)27 ConnectorSession (io.trino.spi.connector.ConnectorSession)25 TrinoException (io.trino.spi.TrinoException)23 IOException (java.io.IOException)22 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)21 Path (org.apache.hadoop.fs.Path)21 Map (java.util.Map)20 TupleDomain (io.trino.spi.predicate.TupleDomain)19 ImmutableList (com.google.common.collect.ImmutableList)18 ImmutableMap (com.google.common.collect.ImmutableMap)18 Set (java.util.Set)18 FileSystem (org.apache.hadoop.fs.FileSystem)17 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)15 ImmutableSet (com.google.common.collect.ImmutableSet)14 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)14 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)14 NOT_SUPPORTED (io.trino.spi.StandardErrorCode.NOT_SUPPORTED)14