Search in sources :

Example 71 with TrinoException

use of io.trino.spi.TrinoException in project trino by trinodb.

the class HiveMetadata method finishInsert.

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle;
    List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toImmutableList());
    HiveStorageFormat tableStorageFormat = handle.getTableStorageFormat();
    partitionUpdates = PartitionUpdate.mergePartitionUpdates(partitionUpdates);
    Table table = metastore.getTable(handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
    if (!table.getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) {
        throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
    }
    if (handle.getBucketProperty().isPresent() && isCreateEmptyBucketFiles(session)) {
        List<PartitionUpdate> partitionUpdatesForMissingBuckets = computePartitionUpdatesForMissingBuckets(session, handle, table, false, partitionUpdates);
        // replace partitionUpdates before creating the empty files so that those files will be cleaned up if we end up rollback
        partitionUpdates = PartitionUpdate.mergePartitionUpdates(concat(partitionUpdates, partitionUpdatesForMissingBuckets));
        for (PartitionUpdate partitionUpdate : partitionUpdatesForMissingBuckets) {
            Optional<Partition> partition = table.getPartitionColumns().isEmpty() ? Optional.empty() : Optional.of(buildPartitionObject(session, table, partitionUpdate));
            if (handle.isTransactional() && partition.isPresent()) {
                PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(partitionUpdate.getStatistics()).build();
                metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition.get(), partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), statistics, handle.isRetriesEnabled());
            }
            createEmptyFiles(session, partitionUpdate.getWritePath(), table, partition, partitionUpdate.getFileNames());
        }
    }
    List<String> partitionedBy = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
    Map<String, Type> columnTypes = handle.getInputColumns().stream().collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager)));
    Map<List<String>, ComputedStatistics> partitionComputedStatistics = createComputedStatisticsToPartitionMap(computedStatistics, partitionedBy, columnTypes);
    for (PartitionUpdate partitionUpdate : partitionUpdates) {
        if (partitionUpdate.getName().isEmpty()) {
            // insert into unpartitioned table
            if (!table.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
                throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, ImmutableList.of()));
            if (partitionUpdate.getUpdateMode() == OVERWRITE) {
                // get privileges from existing table
                PrincipalPrivileges principalPrivileges = fromHivePrivilegeInfos(metastore.listTablePrivileges(handle.getSchemaName(), handle.getTableName(), Optional.empty()));
                // first drop it
                metastore.dropTable(session, handle.getSchemaName(), handle.getTableName());
                // create the table with the new location
                metastore.createTable(session, table, principalPrivileges, Optional.of(partitionUpdate.getWritePath()), Optional.of(partitionUpdate.getFileNames()), false, partitionStatistics, handle.isRetriesEnabled());
            } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == APPEND) {
                // insert into unpartitioned table
                metastore.finishInsertIntoExistingTable(session, handle.getSchemaName(), handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, handle.isRetriesEnabled());
            } else {
                throw new IllegalArgumentException("Unsupported update mode: " + partitionUpdate.getUpdateMode());
            }
        } else if (partitionUpdate.getUpdateMode() == APPEND) {
            // insert into existing partition
            List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
            PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
            metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, handle.isRetriesEnabled());
        } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) {
            // insert into new partition or overwrite existing partition
            Partition partition = buildPartitionObject(session, table, partitionUpdate);
            if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) {
                throw new TrinoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
            }
            PartitionStatistics partitionStatistics = createPartitionStatistics(partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
            if (partitionUpdate.getUpdateMode() == OVERWRITE) {
                if (handle.getLocationHandle().getWriteMode() == DIRECT_TO_TARGET_EXISTING_DIRECTORY) {
                    removeNonCurrentQueryFiles(session, partitionUpdate.getTargetPath());
                    if (handle.isRetriesEnabled()) {
                        HdfsContext hdfsContext = new HdfsContext(session);
                        cleanExtraOutputFiles(hdfsEnvironment, hdfsContext, session.getQueryId(), partitionUpdate.getTargetPath(), ImmutableSet.copyOf(partitionUpdate.getFileNames()));
                    }
                } else {
                    metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), partition.getValues(), true);
                    metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
                }
            } else {
                metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), Optional.of(partitionUpdate.getFileNames()), partitionStatistics, handle.isRetriesEnabled());
            }
        } else {
            throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode()));
        }
    }
    if (isFullAcidTable(table.getParameters())) {
        HdfsContext context = new HdfsContext(session);
        for (PartitionUpdate update : partitionUpdates) {
            long writeId = handle.getTransaction().getWriteId();
            Path deltaDirectory = new Path(format("%s/%s/%s", table.getStorage().getLocation(), update.getName(), deltaSubdir(writeId, writeId, 0)));
            createOrcAcidVersionFile(context, deltaDirectory);
        }
    }
    return Optional.of(new HiveWrittenPartitions(partitionUpdates.stream().map(PartitionUpdate::getName).collect(toImmutableList())));
}
Also used : HiveTableProperties.getBucketProperty(io.trino.plugin.hive.HiveTableProperties.getBucketProperty) InsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) HiveSessionProperties.getQueryPartitionFilterRequiredSchemas(io.trino.plugin.hive.HiveSessionProperties.getQueryPartitionFilterRequiredSchemas) ORC_BLOOM_FILTER_FPP(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) FileSystem(org.apache.hadoop.fs.FileSystem) HiveBucketing.isSupportedBucketing(io.trino.plugin.hive.util.HiveBucketing.isSupportedBucketing) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.hive.HiveTableProperties.ANALYZE_COLUMNS_PROPERTY) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveTableProperties.getFooterSkipCount(io.trino.plugin.hive.HiveTableProperties.getFooterSkipCount) TABLE_NOT_FOUND(io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND) Map(java.util.Map) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) HiveSessionProperties.isBucketExecutionEnabled(io.trino.plugin.hive.HiveSessionProperties.isBucketExecutionEnabled) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Domain(io.trino.spi.predicate.Domain) HiveSessionProperties.isStatisticsEnabled(io.trino.plugin.hive.HiveSessionProperties.isStatisticsEnabled) NULL_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY) HiveUtil.verifyPartitionTypeSupported(io.trino.plugin.hive.util.HiveUtil.verifyPartitionTypeSupported) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) AcidUtils.deltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deltaSubdir) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) HIVE_CONCURRENT_MODIFICATION_DETECTED(io.trino.plugin.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) HIVE_UNKNOWN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) HiveSessionProperties.isSortedWritingEnabled(io.trino.plugin.hive.HiveSessionProperties.isSortedWritingEnabled) Joiner(com.google.common.base.Joiner) Partition(io.trino.plugin.hive.metastore.Partition) HiveColumnHandle.updateRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle) HiveUtil(io.trino.plugin.hive.util.HiveUtil) HiveWriterFactory.computeNonTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeNonTransactionalBucketedFilename) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) Supplier(java.util.function.Supplier) HiveTableProperties.getSingleCharacterProperty(io.trino.plugin.hive.HiveTableProperties.getSingleCharacterProperty) TimestampType(io.trino.spi.type.TimestampType) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) HiveSessionProperties.getCompressionCodec(io.trino.plugin.hive.HiveSessionProperties.getCompressionCodec) OptionalLong(java.util.OptionalLong) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) PATH_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME) GrantInfo(io.trino.spi.security.GrantInfo) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveAnalyzeProperties.getColumnNames(io.trino.plugin.hive.HiveAnalyzeProperties.getColumnNames) MapType(io.trino.spi.type.MapType) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTableProperties.getPartitionedBy(io.trino.plugin.hive.HiveTableProperties.getPartitionedBy) CatalogName(io.trino.plugin.base.CatalogName) HiveSessionProperties.isCollectColumnStatisticsOnWrite(io.trino.plugin.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) AVRO_SCHEMA_URL(io.trino.plugin.hive.HiveTableProperties.AVRO_SCHEMA_URL) HiveBucketing.getHiveBucketHandle(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketHandle) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) HiveTableProperties.getHeaderSkipCount(io.trino.plugin.hive.HiveTableProperties.getHeaderSkipCount) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) HiveWriteUtils.initializeSerializer(io.trino.plugin.hive.util.HiveWriteUtils.initializeSerializer) HiveSessionProperties.isCreateEmptyBucketFiles(io.trino.plugin.hive.HiveSessionProperties.isCreateEmptyBucketFiles) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HiveSessionProperties.isNonTransactionalOptimizeEnabled(io.trino.plugin.hive.HiveSessionProperties.isNonTransactionalOptimizeEnabled) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) HiveTableProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat) HiveTableProperties.getAvroSchemaUrl(io.trino.plugin.hive.HiveTableProperties.getAvroSchemaUrl) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MetadataProvider(io.trino.spi.connector.MetadataProvider) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Collectors.toMap(java.util.stream.Collectors.toMap) Block(io.trino.spi.block.Block) ViewReaderUtil.encodeViewData(io.trino.plugin.hive.ViewReaderUtil.encodeViewData) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) ImmutableSet(com.google.common.collect.ImmutableSet) SemiTransactionalHiveMetastore.cleanExtraOutputFiles(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore.cleanExtraOutputFiles) Collection(java.util.Collection) VIEW_STORAGE_FORMAT(io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ROW_COUNT(io.trino.spi.statistics.TableStatisticType.ROW_COUNT) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) StandardErrorCode(io.trino.spi.StandardErrorCode) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) TableStatisticType(io.trino.spi.statistics.TableStatisticType) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) HiveSessionProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveSessionProperties.getHiveStorageFormat) AUTO_PURGE(io.trino.plugin.hive.HiveTableProperties.AUTO_PURGE) NANOSECONDS(io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS) CompletableFuture(java.util.concurrent.CompletableFuture) HiveSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.hive.HiveSessionProperties.isProjectionPushdownEnabled) Statistics.createComputedStatisticsToPartitionMap(io.trino.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HiveTableProperties.isTransactional(io.trino.plugin.hive.HiveTableProperties.isTransactional) HashSet(java.util.HashSet) ViewReaderUtil.createViewReader(io.trino.plugin.hive.ViewReaderUtil.createViewReader) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) ImmutableList(com.google.common.collect.ImmutableList) TEXTFILE_FIELD_SEPARATOR_ESCAPE(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR_ESCAPE) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) NoSuchElementException(java.util.NoSuchElementException) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) RetryMode(io.trino.spi.connector.RetryMode) NEW(io.trino.plugin.hive.PartitionUpdate.UpdateMode.NEW) HIVE_VIEW_TRANSLATION_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_VIEW_TRANSLATION_ERROR) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveWriteUtils.isFileCreatedByQuery(io.trino.plugin.hive.util.HiveWriteUtils.isFileCreatedByQuery) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) JobConf(org.apache.hadoop.mapred.JobConf) HiveTableProperties.getNullFormat(io.trino.plugin.hive.HiveTableProperties.getNullFormat) HiveTableProperties.getOrcBloomFilterFpp(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterFpp) OrcAcidVersion.writeVersionFile(org.apache.hadoop.hive.ql.io.AcidUtils.OrcAcidVersion.writeVersionFile) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) HiveWriterFactory.computeTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeTransactionalBucketedFilename) TypeManager(io.trino.spi.type.TypeManager) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) HiveTableProperties.getExternalLocation(io.trino.plugin.hive.HiveTableProperties.getExternalLocation) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) Column(io.trino.plugin.hive.metastore.Column) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PARTITION_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME) ORC_BLOOM_FILTER_COLUMNS(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) Table(io.trino.plugin.hive.metastore.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) MetastoreUtil.getHiveSchema(io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) HIVE_COLUMN_ORDER_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) CSV_QUOTE(io.trino.plugin.hive.HiveTableProperties.CSV_QUOTE) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HIVE_INVALID_VIEW_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) NON_TRANSACTIONAL_OPTIMIZE_ENABLED(io.trino.plugin.hive.HiveSessionProperties.NON_TRANSACTIONAL_OPTIMIZE_ENABLED) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) ArrayList(java.util.ArrayList) PrincipalPrivileges.fromHivePrivilegeInfos(io.trino.plugin.hive.metastore.PrincipalPrivileges.fromHivePrivilegeInfos) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveUtil.hiveColumnHandles(io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) ViewReaderUtil.isPrestoView(io.trino.plugin.hive.ViewReaderUtil.isPrestoView) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveSessionProperties.isRespectTableFormat(io.trino.plugin.hive.HiveSessionProperties.isRespectTableFormat) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) Properties(java.util.Properties) HiveWriteUtils.checkTableIsWritable(io.trino.plugin.hive.util.HiveWriteUtils.checkTableIsWritable) EXTERNAL_LOCATION_PROPERTY(io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) TEXTFILE_FIELD_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveStatisticsProvider(io.trino.plugin.hive.statistics.HiveStatisticsProvider) RoleGrant(io.trino.spi.security.RoleGrant) File(java.io.File) HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore(io.trino.plugin.hive.HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) Sets.intersection(com.google.common.collect.Sets.intersection) TableType(org.apache.hadoop.hive.metastore.TableType) HiveSessionProperties.isParallelPartitionedBucketedWrites(io.trino.plugin.hive.HiveSessionProperties.isParallelPartitionedBucketedWrites) ViewReaderUtil.isHiveOrPrestoView(io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView) HiveSessionProperties.isQueryPartitionFilterRequired(io.trino.plugin.hive.HiveSessionProperties.isQueryPartitionFilterRequired) HIVE_WRITER_CLOSE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR) URL(java.net.URL) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HiveBucketing(io.trino.plugin.hive.util.HiveBucketing) Iterables.concat(com.google.common.collect.Iterables.concat) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) ErrorType(io.trino.spi.ErrorType) HIVE_FILESYSTEM_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) Splitter(com.google.common.base.Splitter) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OVERWRITE(io.trino.plugin.hive.PartitionUpdate.UpdateMode.OVERWRITE) SKIP_HEADER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) AccessControlMetadata(io.trino.plugin.hive.security.AccessControlMetadata) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HiveTableProperties.getAnalyzeColumns(io.trino.plugin.hive.HiveTableProperties.getAnalyzeColumns) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) HiveApplyProjectionUtil.find(io.trino.plugin.hive.HiveApplyProjectionUtil.find) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) FILE_SIZE_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) Assignment(io.trino.spi.connector.Assignment) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) SystemTable(io.trino.spi.connector.SystemTable) CSV_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.CSV_SEPARATOR) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) HiveSessionProperties.isOptimizedMismatchedBucketCount(io.trino.plugin.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Verify.verify(com.google.common.base.Verify.verify) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) HiveTableProperties.getOrcBloomFilterColumns(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterColumns) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) Privilege(io.trino.spi.security.Privilege) VerifyException(com.google.common.base.VerifyException) APPEND(io.trino.plugin.hive.PartitionUpdate.UpdateMode.APPEND) HiveSessionProperties.getInsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) MalformedURLException(java.net.MalformedURLException) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OptimizeTableProcedure(io.trino.plugin.hive.procedure.OptimizeTableProcedure) SKIP_FOOTER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT) SYNTHESIZED(io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) LocalProperty(io.trino.spi.connector.LocalProperty) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveUtil.getPartitionKeyColumnHandles(io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles) Collections(java.util.Collections) CSV_ESCAPE(io.trino.plugin.hive.HiveTableProperties.CSV_ESCAPE) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) HiveTableProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat) HiveSessionProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveSessionProperties.getHiveStorageFormat) ImmutableList(com.google.common.collect.ImmutableList) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) Path(org.apache.hadoop.fs.Path) Partition(io.trino.plugin.hive.metastore.Partition) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) SystemTable(io.trino.spi.connector.SystemTable) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) TimestampType(io.trino.spi.type.TimestampType) MapType(io.trino.spi.type.MapType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TableType(org.apache.hadoop.hive.metastore.TableType) ErrorType(io.trino.spi.ErrorType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) Slice(io.airlift.slice.Slice) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) TrinoException(io.trino.spi.TrinoException)

Example 72 with TrinoException

use of io.trino.spi.TrinoException in project trino by trinodb.

the class HiveMetadata method buildColumnDomain.

private static Domain buildColumnDomain(ColumnHandle column, List<HivePartition> partitions) {
    checkArgument(!partitions.isEmpty(), "partitions cannot be empty");
    boolean hasNull = false;
    boolean hasNaN = false;
    List<Object> nonNullValues = new ArrayList<>();
    Type type = ((HiveColumnHandle) column).getType();
    for (HivePartition partition : partitions) {
        NullableValue value = partition.getKeys().get(column);
        if (value == null) {
            throw new TrinoException(HIVE_UNKNOWN_ERROR, format("Partition %s does not have a value for partition column %s", partition, column));
        }
        if (value.isNull()) {
            hasNull = true;
        } else {
            if (isFloatingPointNaN(type, value.getValue())) {
                hasNaN = true;
            }
            nonNullValues.add(value.getValue());
        }
    }
    Domain domain;
    if (nonNullValues.isEmpty()) {
        domain = Domain.none(type);
    } else if (hasNaN) {
        domain = Domain.notNull(type);
    } else {
        domain = Domain.multipleValues(type, nonNullValues);
    }
    if (hasNull) {
        domain = domain.union(Domain.onlyNull(type));
    }
    return domain;
}
Also used : ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) TimestampType(io.trino.spi.type.TimestampType) MapType(io.trino.spi.type.MapType) TableStatisticType(io.trino.spi.statistics.TableStatisticType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TableType(org.apache.hadoop.hive.metastore.TableType) ErrorType(io.trino.spi.ErrorType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) ArrayList(java.util.ArrayList) NullableValue(io.trino.spi.predicate.NullableValue) TrinoException(io.trino.spi.TrinoException) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain)

Example 73 with TrinoException

use of io.trino.spi.TrinoException in project trino by trinodb.

the class HiveMetadata method getInsertLayout.

@Override
public Optional<ConnectorTableLayout> getInsertLayout(ConnectorSession session, ConnectorTableHandle tableHandle) {
    HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
    SchemaTableName tableName = hiveTableHandle.getSchemaTableName();
    Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    if (table.getStorage().getBucketProperty().isPresent()) {
        if (!isSupportedBucketing(table)) {
            throw new TrinoException(NOT_SUPPORTED, "Cannot write to a table bucketed on an unsupported type");
        }
    } else // Note: we cannot use hiveTableHandle.isInAcidTransaction() here as transaction is not yet set in HiveTableHandle when getInsertLayout is called
    if (isFullAcidTable(table.getParameters())) {
        table = Table.builder(table).withStorage(storage -> storage.setBucketProperty(Optional.of(new HiveBucketProperty(ImmutableList.of(), HiveBucketing.BucketingVersion.BUCKETING_V2, 1, ImmutableList.of())))).build();
    }
    Optional<HiveBucketHandle> hiveBucketHandle = getHiveBucketHandle(session, table, typeManager);
    List<Column> partitionColumns = table.getPartitionColumns();
    if (hiveBucketHandle.isEmpty()) {
        // return preferred layout which is partitioned by partition columns
        if (partitionColumns.isEmpty()) {
            return Optional.empty();
        }
        return Optional.of(new ConnectorTableLayout(partitionColumns.stream().map(Column::getName).collect(toImmutableList())));
    }
    HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().orElseThrow(() -> new NoSuchElementException("Bucket property should be set"));
    if (!bucketProperty.getSortedBy().isEmpty() && !isSortedWritingEnabled(session)) {
        throw new TrinoException(NOT_SUPPORTED, "Writing to bucketed sorted Hive tables is disabled");
    }
    ImmutableList.Builder<String> partitioningColumns = ImmutableList.builder();
    hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getName).forEach(partitioningColumns::add);
    partitionColumns.stream().map(Column::getName).forEach(partitioningColumns::add);
    HivePartitioningHandle partitioningHandle = new HivePartitioningHandle(hiveBucketHandle.get().getBucketingVersion(), hiveBucketHandle.get().getTableBucketCount(), hiveBucketHandle.get().getColumns().stream().map(HiveColumnHandle::getHiveType).collect(toImmutableList()), OptionalInt.of(hiveBucketHandle.get().getTableBucketCount()), !partitionColumns.isEmpty() && isParallelPartitionedBucketedWrites(session));
    return Optional.of(new ConnectorTableLayout(partitioningHandle, partitioningColumns.build()));
}
Also used : HiveTableProperties.getBucketProperty(io.trino.plugin.hive.HiveTableProperties.getBucketProperty) InsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) HiveSessionProperties.getQueryPartitionFilterRequiredSchemas(io.trino.plugin.hive.HiveSessionProperties.getQueryPartitionFilterRequiredSchemas) ORC_BLOOM_FILTER_FPP(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP) FileSystem(org.apache.hadoop.fs.FileSystem) HiveBucketing.isSupportedBucketing(io.trino.plugin.hive.util.HiveBucketing.isSupportedBucketing) ANALYZE_COLUMNS_PROPERTY(io.trino.plugin.hive.HiveTableProperties.ANALYZE_COLUMNS_PROPERTY) TypeUtils.isFloatingPointNaN(io.trino.spi.type.TypeUtils.isFloatingPointNaN) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) HiveApplyProjectionUtil.replaceWithNewVariables(io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveTableProperties.getFooterSkipCount(io.trino.plugin.hive.HiveTableProperties.getFooterSkipCount) TABLE_NOT_FOUND(io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND) Map(java.util.Map) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) HiveSessionProperties.isBucketExecutionEnabled(io.trino.plugin.hive.HiveSessionProperties.isBucketExecutionEnabled) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Domain(io.trino.spi.predicate.Domain) HiveSessionProperties.isStatisticsEnabled(io.trino.plugin.hive.HiveSessionProperties.isStatisticsEnabled) NULL_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.NULL_FORMAT_PROPERTY) HiveUtil.verifyPartitionTypeSupported(io.trino.plugin.hive.util.HiveUtil.verifyPartitionTypeSupported) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) Collectors.joining(java.util.stream.Collectors.joining) Stream(java.util.stream.Stream) AcidUtils.deltaSubdir(org.apache.hadoop.hive.ql.io.AcidUtils.deltaSubdir) TrinoPrincipal(io.trino.spi.security.TrinoPrincipal) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) HIVE_CONCURRENT_MODIFICATION_DETECTED(io.trino.plugin.hive.HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED) HIVE_UNKNOWN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_UNKNOWN_ERROR) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) HiveSessionProperties.isSortedWritingEnabled(io.trino.plugin.hive.HiveSessionProperties.isSortedWritingEnabled) Joiner(com.google.common.base.Joiner) Partition(io.trino.plugin.hive.metastore.Partition) HiveColumnHandle.updateRowIdColumnHandle(io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle) HiveUtil(io.trino.plugin.hive.util.HiveUtil) HiveWriterFactory.computeNonTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeNonTransactionalBucketedFilename) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) INVALID_ANALYZE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) Supplier(java.util.function.Supplier) HiveTableProperties.getSingleCharacterProperty(io.trino.plugin.hive.HiveTableProperties.getSingleCharacterProperty) TimestampType(io.trino.spi.type.TimestampType) ADD(io.trino.plugin.hive.util.Statistics.ReduceOperator.ADD) HiveSessionProperties.getCompressionCodec(io.trino.plugin.hive.HiveSessionProperties.getCompressionCodec) OptionalLong(java.util.OptionalLong) HIVE_INVALID_METADATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) PATH_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME) GrantInfo(io.trino.spi.security.GrantInfo) MaterializedViewFreshness(io.trino.spi.connector.MaterializedViewFreshness) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) INVALID_TABLE_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY) HiveWriteUtils(io.trino.plugin.hive.util.HiveWriteUtils) HiveAnalyzeProperties.getColumnNames(io.trino.plugin.hive.HiveAnalyzeProperties.getColumnNames) MapType(io.trino.spi.type.MapType) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTableProperties.getPartitionedBy(io.trino.plugin.hive.HiveTableProperties.getPartitionedBy) CatalogName(io.trino.plugin.base.CatalogName) HiveSessionProperties.isCollectColumnStatisticsOnWrite(io.trino.plugin.hive.HiveSessionProperties.isCollectColumnStatisticsOnWrite) AVRO_SCHEMA_URL(io.trino.plugin.hive.HiveTableProperties.AVRO_SCHEMA_URL) HiveBucketing.getHiveBucketHandle(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketHandle) HiveUtil.isHiveSystemSchema(io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema) HiveTableProperties.getHeaderSkipCount(io.trino.plugin.hive.HiveTableProperties.getHeaderSkipCount) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) Boolean.parseBoolean(java.lang.Boolean.parseBoolean) HiveWriteUtils.initializeSerializer(io.trino.plugin.hive.util.HiveWriteUtils.initializeSerializer) HiveSessionProperties.isCreateEmptyBucketFiles(io.trino.plugin.hive.HiveSessionProperties.isCreateEmptyBucketFiles) TableStatisticsMetadata(io.trino.spi.statistics.TableStatisticsMetadata) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HiveSessionProperties.isNonTransactionalOptimizeEnabled(io.trino.plugin.hive.HiveSessionProperties.isNonTransactionalOptimizeEnabled) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) HiveTableProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveTableProperties.getHiveStorageFormat) HiveTableProperties.getAvroSchemaUrl(io.trino.plugin.hive.HiveTableProperties.getAvroSchemaUrl) CompressionConfigUtil.configureCompression(io.trino.plugin.hive.util.CompressionConfigUtil.configureCompression) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) Database(io.trino.plugin.hive.metastore.Database) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) MetadataProvider(io.trino.spi.connector.MetadataProvider) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Collectors.toMap(java.util.stream.Collectors.toMap) Block(io.trino.spi.block.Block) ViewReaderUtil.encodeViewData(io.trino.plugin.hive.ViewReaderUtil.encodeViewData) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) Statistics.reduce(io.trino.plugin.hive.util.Statistics.reduce) ImmutableSet(com.google.common.collect.ImmutableSet) SemiTransactionalHiveMetastore.cleanExtraOutputFiles(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore.cleanExtraOutputFiles) Collection(java.util.Collection) VIEW_STORAGE_FORMAT(io.trino.plugin.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) ROW_COUNT(io.trino.spi.statistics.TableStatisticType.ROW_COUNT) Constraint.alwaysTrue(io.trino.spi.connector.Constraint.alwaysTrue) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) INVALID_SCHEMA_PROPERTY(io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) StandardErrorCode(io.trino.spi.StandardErrorCode) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) BeginTableExecuteResult(io.trino.spi.connector.BeginTableExecuteResult) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) ProjectedColumnRepresentation(io.trino.plugin.hive.HiveApplyProjectionUtil.ProjectedColumnRepresentation) TableStatisticType(io.trino.spi.statistics.TableStatisticType) JsonCodec(io.airlift.json.JsonCodec) Constraint(io.trino.spi.connector.Constraint) HiveSessionProperties.getHiveStorageFormat(io.trino.plugin.hive.HiveSessionProperties.getHiveStorageFormat) AUTO_PURGE(io.trino.plugin.hive.HiveTableProperties.AUTO_PURGE) NANOSECONDS(io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS) CompletableFuture(java.util.concurrent.CompletableFuture) HiveSessionProperties.isProjectionPushdownEnabled(io.trino.plugin.hive.HiveSessionProperties.isProjectionPushdownEnabled) Statistics.createComputedStatisticsToPartitionMap(io.trino.plugin.hive.util.Statistics.createComputedStatisticsToPartitionMap) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HiveTableProperties.isTransactional(io.trino.plugin.hive.HiveTableProperties.isTransactional) HashSet(java.util.HashSet) ViewReaderUtil.createViewReader(io.trino.plugin.hive.ViewReaderUtil.createViewReader) OpenCSVSerde(org.apache.hadoop.hive.serde2.OpenCSVSerde) ImmutableList(com.google.common.collect.ImmutableList) TEXTFILE_FIELD_SEPARATOR_ESCAPE(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR_ESCAPE) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) NoSuchElementException(java.util.NoSuchElementException) VIRTUAL_VIEW(org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW) RetryMode(io.trino.spi.connector.RetryMode) NEW(io.trino.plugin.hive.PartitionUpdate.UpdateMode.NEW) HIVE_VIEW_TRANSLATION_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_VIEW_TRANSLATION_ERROR) HiveUtil.getRegularColumnHandles(io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles) HiveWriteUtils.isFileCreatedByQuery(io.trino.plugin.hive.util.HiveWriteUtils.isFileCreatedByQuery) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) JobConf(org.apache.hadoop.mapred.JobConf) HiveTableProperties.getNullFormat(io.trino.plugin.hive.HiveTableProperties.getNullFormat) HiveTableProperties.getOrcBloomFilterFpp(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterFpp) OrcAcidVersion.writeVersionFile(org.apache.hadoop.hive.ql.io.AcidUtils.OrcAcidVersion.writeVersionFile) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) HiveWriterFactory.computeTransactionalBucketedFilename(io.trino.plugin.hive.HiveWriterFactory.computeTransactionalBucketedFilename) TypeManager(io.trino.spi.type.TypeManager) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) HiveTableProperties.getExternalLocation(io.trino.plugin.hive.HiveTableProperties.getExternalLocation) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) ConnectorTableExecuteHandle(io.trino.spi.connector.ConnectorTableExecuteHandle) HiveAnalyzeProperties.getPartitionList(io.trino.plugin.hive.HiveAnalyzeProperties.getPartitionList) Column(io.trino.plugin.hive.metastore.Column) FILE_MODIFIED_TIME_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) PARTITION_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME) ORC_BLOOM_FILTER_COLUMNS(io.trino.plugin.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS) Table(io.trino.plugin.hive.metastore.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) Set(java.util.Set) MetastoreUtil.getHiveSchema(io.trino.plugin.hive.metastore.MetastoreUtil.getHiveSchema) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) HIVE_COLUMN_ORDER_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) CSV_QUOTE(io.trino.plugin.hive.HiveTableProperties.CSV_QUOTE) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) MetastoreUtil.getProtectMode(io.trino.plugin.hive.metastore.MetastoreUtil.getProtectMode) HIVE_INVALID_VIEW_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) ConnectorTablePartitioning(io.trino.spi.connector.ConnectorTablePartitioning) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) ConfigurationUtils.toJobConf(io.trino.plugin.hive.util.ConfigurationUtils.toJobConf) NON_TRANSACTIONAL_OPTIMIZE_ENABLED(io.trino.plugin.hive.HiveSessionProperties.NON_TRANSACTIONAL_OPTIMIZE_ENABLED) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) ArrayList(java.util.ArrayList) PrincipalPrivileges.fromHivePrivilegeInfos(io.trino.plugin.hive.metastore.PrincipalPrivileges.fromHivePrivilegeInfos) ColumnHandle(io.trino.spi.connector.ColumnHandle) HiveUtil.hiveColumnHandles(io.trino.plugin.hive.util.HiveUtil.hiveColumnHandles) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) ViewReaderUtil.isPrestoView(io.trino.plugin.hive.ViewReaderUtil.isPrestoView) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveSessionProperties.isRespectTableFormat(io.trino.plugin.hive.HiveSessionProperties.isRespectTableFormat) RetryDriver.retry(io.trino.plugin.hive.util.RetryDriver.retry) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) Properties(java.util.Properties) HiveWriteUtils.checkTableIsWritable(io.trino.plugin.hive.util.HiveWriteUtils.checkTableIsWritable) EXTERNAL_LOCATION_PROPERTY(io.trino.plugin.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY) TEXTFILE_FIELD_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.TEXTFILE_FIELD_SEPARATOR) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveStatisticsProvider(io.trino.plugin.hive.statistics.HiveStatisticsProvider) RoleGrant(io.trino.spi.security.RoleGrant) File(java.io.File) HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore(io.trino.plugin.hive.HiveSessionProperties.isDelegateTransactionalManagedTableLocationToMetastore) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) Sets.intersection(com.google.common.collect.Sets.intersection) TableType(org.apache.hadoop.hive.metastore.TableType) HiveSessionProperties.isParallelPartitionedBucketedWrites(io.trino.plugin.hive.HiveSessionProperties.isParallelPartitionedBucketedWrites) ViewReaderUtil.isHiveOrPrestoView(io.trino.plugin.hive.ViewReaderUtil.isHiveOrPrestoView) HiveSessionProperties.isQueryPartitionFilterRequired(io.trino.plugin.hive.HiveSessionProperties.isQueryPartitionFilterRequired) HIVE_WRITER_CLOSE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR) URL(java.net.URL) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) HiveBucketing(io.trino.plugin.hive.util.HiveBucketing) Iterables.concat(com.google.common.collect.Iterables.concat) CatalogSchemaName(io.trino.spi.connector.CatalogSchemaName) Path(org.apache.hadoop.fs.Path) ErrorType(io.trino.spi.ErrorType) HIVE_FILESYSTEM_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) Splitter(com.google.common.base.Splitter) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OVERWRITE(io.trino.plugin.hive.PartitionUpdate.UpdateMode.OVERWRITE) SKIP_HEADER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT) RowType(io.trino.spi.type.RowType) ImmutableMap(com.google.common.collect.ImmutableMap) AccessControlMetadata(io.trino.plugin.hive.security.AccessControlMetadata) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HiveTableProperties.getAnalyzeColumns(io.trino.plugin.hive.HiveTableProperties.getAnalyzeColumns) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) Statistics.fromComputedStatistics(io.trino.plugin.hive.util.Statistics.fromComputedStatistics) HiveApplyProjectionUtil.find(io.trino.plugin.hive.HiveApplyProjectionUtil.find) ConnectorOutputMetadata(io.trino.spi.connector.ConnectorOutputMetadata) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) MetastoreUtil.buildInitialPrivilegeSet(io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet) MetastoreUtil.verifyOnline(io.trino.plugin.hive.metastore.MetastoreUtil.verifyOnline) FILE_SIZE_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) Assignment(io.trino.spi.connector.Assignment) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) SystemTable(io.trino.spi.connector.SystemTable) CSV_SEPARATOR(io.trino.plugin.hive.HiveTableProperties.CSV_SEPARATOR) HiveWriteUtils.isWritableType(io.trino.plugin.hive.util.HiveWriteUtils.isWritableType) HiveSessionProperties.isOptimizedMismatchedBucketCount(io.trino.plugin.hive.HiveSessionProperties.isOptimizedMismatchedBucketCount) Logger(io.airlift.log.Logger) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Verify.verify(com.google.common.base.Verify.verify) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) Objects.requireNonNull(java.util.Objects.requireNonNull) Suppliers(com.google.common.base.Suppliers) HiveTableProperties.getOrcBloomFilterColumns(io.trino.plugin.hive.HiveTableProperties.getOrcBloomFilterColumns) HiveApplyProjectionUtil.extractSupportedProjectedColumns(io.trino.plugin.hive.HiveApplyProjectionUtil.extractSupportedProjectedColumns) Privilege(io.trino.spi.security.Privilege) VerifyException(com.google.common.base.VerifyException) APPEND(io.trino.plugin.hive.PartitionUpdate.UpdateMode.APPEND) HiveSessionProperties.getInsertExistingPartitionsBehavior(io.trino.plugin.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) TupleDomain.withColumnDomains(io.trino.spi.predicate.TupleDomain.withColumnDomains) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) MalformedURLException(java.net.MalformedURLException) HiveSessionProperties.isPropagateTableScanSortingProperties(io.trino.plugin.hive.HiveSessionProperties.isPropagateTableScanSortingProperties) Statistics.createEmptyPartitionStatistics(io.trino.plugin.hive.util.Statistics.createEmptyPartitionStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OptimizeTableProcedure(io.trino.plugin.hive.procedure.OptimizeTableProcedure) SKIP_FOOTER_LINE_COUNT(io.trino.plugin.hive.HiveTableProperties.SKIP_FOOTER_LINE_COUNT) SYNTHESIZED(io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) LocalProperty(io.trino.spi.connector.LocalProperty) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveUtil.getPartitionKeyColumnHandles(io.trino.plugin.hive.util.HiveUtil.getPartitionKeyColumnHandles) Collections(java.util.Collections) CSV_ESCAPE(io.trino.plugin.hive.HiveTableProperties.CSV_ESCAPE) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) AcidTransaction.forCreateTable(io.trino.plugin.hive.acid.AcidTransaction.forCreateTable) SystemTables.getSourceTableNameFromSystemTable(io.trino.plugin.hive.util.SystemTables.getSourceTableNameFromSystemTable) Table(io.trino.plugin.hive.metastore.Table) AcidUtils.isTransactionalTable(org.apache.hadoop.hive.ql.io.AcidUtils.isTransactionalTable) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isSparkBucketedTable(io.trino.plugin.hive.util.HiveUtil.isSparkBucketedTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) SystemTable(io.trino.spi.connector.SystemTable) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Column(io.trino.plugin.hive.metastore.Column) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) HiveBucketing.getHiveBucketHandle(io.trino.plugin.hive.util.HiveBucketing.getHiveBucketHandle) TrinoException(io.trino.spi.TrinoException) NoSuchElementException(java.util.NoSuchElementException)

Example 74 with TrinoException

use of io.trino.spi.TrinoException in project trino by trinodb.

the class RcFileFileWriterFactory method createFileWriter.

@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
    if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    RcFileEncoding rcFileEncoding;
    if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
        rcFileEncoding = new BinaryRcFileEncoding(timeZone);
    } else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
        rcFileEncoding = createTextVectorEncoding(schema);
    } else {
        return Optional.empty();
    }
    Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
        OutputStream outputStream = fileSystem.create(path, false);
        Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
        if (isRcfileOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
                } catch (IOException e) {
                    throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory));
    } catch (Exception e) {
        throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
    }
}
Also used : HdfsRcFileDataSource(io.trino.plugin.hive.rcfile.HdfsRcFileDataSource) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Type(io.trino.spi.type.Type) Callable(java.util.concurrent.Callable) RcFileDataSource(io.trino.rcfile.RcFileDataSource) OptionalInt(java.util.OptionalInt) Supplier(java.util.function.Supplier) Inject(javax.inject.Inject) HiveUtil.getColumnNames(io.trino.plugin.hive.util.HiveUtil.getColumnNames) HiveSessionProperties.isRcfileOptimizedWriterValidate(io.trino.plugin.hive.HiveSessionProperties.isRcfileOptimizedWriterValidate) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) OutputStream(java.io.OutputStream) HIVE_WRITER_OPEN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveUtil.getColumnTypes(io.trino.plugin.hive.util.HiveUtil.getColumnTypes) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) RcFileEncoding(io.trino.rcfile.RcFileEncoding) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) JobConf(org.apache.hadoop.mapred.JobConf) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) List(java.util.List) RcFilePageSourceFactory.createTextVectorEncoding(io.trino.plugin.hive.rcfile.RcFilePageSourceFactory.createTextVectorEncoding) Collectors.toList(java.util.stream.Collectors.toList) HIVE_WRITE_VALIDATION_FAILED(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) Optional(java.util.Optional) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) TypeManager(io.trino.spi.type.TypeManager) OutputStream(java.io.OutputStream) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding) RcFileEncoding(io.trino.rcfile.RcFileEncoding) IOException(java.io.IOException) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) Type(io.trino.spi.type.Type) HdfsRcFileDataSource(io.trino.plugin.hive.rcfile.HdfsRcFileDataSource) FileSystem(org.apache.hadoop.fs.FileSystem) TrinoException(io.trino.spi.TrinoException) LazyBinaryColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe) Supplier(java.util.function.Supplier) BinaryRcFileEncoding(io.trino.rcfile.binary.BinaryRcFileEncoding)

Example 75 with TrinoException

use of io.trino.spi.TrinoException in project trino by trinodb.

the class HivePageSink method getWriterIndexes.

private int[] getWriterIndexes(Page page) {
    Page partitionColumns = extractColumns(page, partitionColumnsInputIndex);
    Block bucketBlock = buildBucketBlock(page);
    int[] writerIndexes = pagePartitioner.partitionPage(partitionColumns, bucketBlock);
    if (pagePartitioner.getMaxIndex() >= maxOpenWriters) {
        throw new TrinoException(HIVE_TOO_MANY_OPEN_PARTITIONS, format("Exceeded limit of %s open writers for partitions/buckets", maxOpenWriters));
    }
    // expand writers list to new size
    while (writers.size() <= pagePartitioner.getMaxIndex()) {
        writers.add(null);
    }
    // create missing writers
    for (int position = 0; position < page.getPositionCount(); position++) {
        int writerIndex = writerIndexes[position];
        HiveWriter writer = writers.get(writerIndex);
        if (writer != null) {
            // and file names have no random component (e.g. bucket_00000)
            if (bucketFunction != null || isTransactional || writer.getWrittenBytes() <= targetMaxFileSize) {
                continue;
            }
            // close current writer
            closeWriter(writer);
        }
        OptionalInt bucketNumber = OptionalInt.empty();
        if (bucketBlock != null) {
            bucketNumber = OptionalInt.of(bucketBlock.getInt(position, 0));
        }
        writer = writerFactory.createWriter(partitionColumns, position, bucketNumber);
        writers.set(writerIndex, writer);
    }
    verify(writers.size() == pagePartitioner.getMaxIndex() + 1);
    verify(!writers.contains(null));
    return writerIndexes;
}
Also used : Block(io.trino.spi.block.Block) TrinoException(io.trino.spi.TrinoException) Page(io.trino.spi.Page) OptionalInt(java.util.OptionalInt)

Aggregations

TrinoException (io.trino.spi.TrinoException)623 IOException (java.io.IOException)151 ImmutableList (com.google.common.collect.ImmutableList)105 List (java.util.List)100 Type (io.trino.spi.type.Type)93 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)90 SchemaTableName (io.trino.spi.connector.SchemaTableName)83 Path (org.apache.hadoop.fs.Path)83 Optional (java.util.Optional)79 ArrayList (java.util.ArrayList)77 Map (java.util.Map)76 ImmutableMap (com.google.common.collect.ImmutableMap)70 Objects.requireNonNull (java.util.Objects.requireNonNull)69 ConnectorSession (io.trino.spi.connector.ConnectorSession)63 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)62 ImmutableSet (com.google.common.collect.ImmutableSet)56 VarcharType (io.trino.spi.type.VarcharType)54 Set (java.util.Set)54 Slice (io.airlift.slice.Slice)53 Table (io.trino.plugin.hive.metastore.Table)52