Search in sources :

Example 6 with AcidTransaction

use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.

the class OrcPageSourceFactory method createOrcPageSource.

private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
    try {
        Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
        if (optionalOrcReader.isEmpty()) {
            return new EmptyPageSource();
        }
        OrcReader reader = optionalOrcReader.get();
        if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
            validateOrcAcidVersion(path, reader);
        }
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
        List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
        List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
        List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
        if (isFullAcid && !originalFilesPresent) {
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadLayouts.add(fullyProjectedLayout());
            fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadLayouts.add(fullyProjectedLayout());
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
        Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        } else {
            projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            OrcReader.ProjectedLayout projectedLayout = null;
            Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
            if (useOrcColumnNames || isFullAcid) {
                String columnName = column.getName().toLowerCase(ENGLISH);
                orcColumn = fileColumnsByName.get(columnName);
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            } else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
                if (orcColumn != null) {
                    projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
                    columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
                }
            }
            Type readType = column.getType();
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                fileReadLayouts.add(projectedLayout);
                // Add predicates on top-level and nested columns
                for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
                    OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
                    if (nestedColumn != null) {
                        predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
                    }
                }
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
        Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
        Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
        if (transaction.isDelete()) {
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
            } else {
                columnAdaptations.add(ColumnAdaptation.rowIdColumn());
            }
        } else if (transaction.isUpdate()) {
            HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
            List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
            if (originalFile) {
                int bucket = bucketNumber.orElse(0);
                long startingRowId = originalFileRowId.orElse(0L);
                columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
            } else {
                columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
            }
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof TrinoException) {
            throw (TrinoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new TrinoException(HIVE_MISSING_DATA, message, e);
        }
        throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_CANNOT_OPEN_SPLIT(io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) OrcTypeKind(io.trino.orc.metadata.OrcType.OrcTypeKind) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) HiveSessionProperties.getOrcLazyReadSmallRanges(io.trino.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveSessionProperties.getOrcTinyStripeThreshold(io.trino.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ReaderColumns(io.trino.plugin.hive.ReaderColumns) ReaderPageSource(io.trino.plugin.hive.ReaderPageSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Slice(io.airlift.slice.Slice) STRUCT(io.trino.orc.metadata.OrcType.OrcTypeKind.STRUCT) ColumnAdaptation.updatedRowColumns(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumns) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) LONG(io.trino.orc.metadata.OrcType.OrcTypeKind.LONG) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) INT(io.trino.orc.metadata.OrcType.OrcTypeKind.INT) HIVE_BAD_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) Properties(java.util.Properties) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) HivePageSourceProvider.projectBaseColumns(io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns) HiveSessionProperties.isOrcNestedLazy(io.trino.plugin.hive.HiveSessionProperties.isOrcNestedLazy) OrcColumn(io.trino.orc.OrcColumn) HIVE_MISSING_DATA(io.trino.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) OrcRecordReader(io.trino.orc.OrcRecordReader) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.trino.orc.OrcDataSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ImmutableMap(com.google.common.collect.ImmutableMap) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) INITIAL_BATCH_SIZE(io.trino.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) PRESTO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.PRESTO_WRITER_ID) HivePageSourceFactory(io.trino.plugin.hive.HivePageSourceFactory) Pattern(java.util.regex.Pattern) TRINO_WRITER_ID(io.trino.orc.metadata.OrcMetadataWriter.TRINO_WRITER_ID) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HiveUtil.isDeserializerClass(io.trino.plugin.hive.util.HiveUtil.isDeserializerClass) Type(io.trino.spi.type.Type) TupleDomainOrcPredicate(io.trino.orc.TupleDomainOrcPredicate) AcidSchema(io.trino.plugin.hive.acid.AcidSchema) HiveSessionProperties.isUseOrcColumnNames(io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames) OptionalInt(java.util.OptionalInt) Inject(javax.inject.Inject) HiveSessionProperties.getOrcStreamBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) ImmutableList(com.google.common.collect.ImmutableList) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) Collectors.mapping(java.util.stream.Collectors.mapping) HiveSessionProperties.isOrcBloomFiltersEnabled(io.trino.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) HiveSessionProperties.getOrcMaxReadBlockSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) OrcReader(io.trino.orc.OrcReader) HiveSessionProperties.getOrcMaxBufferSize(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) UTF_8(java.nio.charset.StandardCharsets.UTF_8) TupleDomain(io.trino.spi.predicate.TupleDomain) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout) Maps(com.google.common.collect.Maps) HiveSessionProperties.getOrcMaxMergeDistance(io.trino.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) ColumnAdaptation.updatedRowColumnsWithOriginalFiles(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation.updatedRowColumnsWithOriginalFiles) AcidInfo(io.trino.plugin.hive.AcidInfo) HiveColumnProjectionInfo(io.trino.plugin.hive.HiveColumnProjectionInfo) Collectors.toList(java.util.stream.Collectors.toList) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) HIVE_FILE_MISSING_COLUMN_NAMES(io.trino.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveConfig(io.trino.plugin.hive.HiveConfig) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) NameBasedFieldMapper(io.trino.orc.NameBasedFieldMapper) FileSystem(org.apache.hadoop.fs.FileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Optional(java.util.Optional) OrcColumn(io.trino.orc.OrcColumn) OrcReader(io.trino.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Domain(io.trino.spi.predicate.Domain) TupleDomain(io.trino.spi.predicate.TupleDomain) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnAdaptation(io.trino.plugin.hive.orc.OrcPageSource.ColumnAdaptation) EmptyPageSource(io.trino.spi.connector.EmptyPageSource) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveUpdateProcessor(io.trino.plugin.hive.HiveUpdateProcessor) OrcDataSource(io.trino.orc.OrcDataSource) OrcDataSourceId(io.trino.orc.OrcDataSourceId) IOException(java.io.IOException) OrcRecordReader(io.trino.orc.OrcRecordReader) AggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) OrcPageSource.handleException(io.trino.plugin.hive.orc.OrcPageSource.handleException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) Type(io.trino.spi.type.Type) TupleDomainOrcPredicateBuilder(io.trino.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) TrinoException(io.trino.spi.TrinoException) NameBasedProjectedLayout.createProjectedLayout(io.trino.orc.OrcReader.NameBasedProjectedLayout.createProjectedLayout) OrcReader.fullyProjectedLayout(io.trino.orc.OrcReader.fullyProjectedLayout)

Example 7 with AcidTransaction

use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.

the class ThriftHiveMetastore method updateTableStatistics.

@Override
public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
    Table originalTable = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    PartitionStatistics currentStatistics = getTableStatistics(identity, originalTable);
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Table modifiedTable = originalTable.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
    if (transaction.isAcidTransactionRunning()) {
        modifiedTable.setWriteId(transaction.getWriteId());
    }
    alterTable(identity, databaseName, tableName, modifiedTable);
    io.trino.plugin.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable);
    OptionalLong rowCount = basicStatistics.getRowCount();
    List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().flatMap(entry -> {
        Optional<Column> column = table.getColumn(entry.getKey());
        if (column.isEmpty() && isAvroTableWithSchemaSet(modifiedTable)) {
            // to store statistics for a column it does not know about.
            return Stream.of();
        }
        HiveType type = column.orElseThrow(() -> new IllegalStateException("Column not found: " + entry.getKey())).getType();
        return Stream.of(createMetastoreColumnStatistics(entry.getKey(), type, entry.getValue(), rowCount));
    }).collect(toImmutableList());
    if (!metastoreColumnStatistics.isEmpty()) {
        setTableColumnStatistics(identity, databaseName, tableName, metastoreColumnStatistics);
    }
    Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
    removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(identity, databaseName, tableName, column));
}
Also used : ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) USER(io.trino.spi.security.PrincipalType.USER) UNKNOWN_METHOD(org.apache.thrift.TApplicationException.UNKNOWN_METHOD) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) NoSuchTxnException(org.apache.hadoop.hive.metastore.api.NoSuchTxnException) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Sets.difference(com.google.common.collect.Sets.difference) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) HivePartition(io.trino.plugin.hive.HivePartition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) OWNERSHIP(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) MetastoreUtil.partitionKeyFilterToStringList(io.trino.plugin.hive.metastore.MetastoreUtil.partitionKeyFilterToStringList) TRUE(java.lang.Boolean.TRUE) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) LockType(org.apache.hadoop.hive.metastore.api.LockType) TxnAbortedException(org.apache.hadoop.hive.metastore.api.TxnAbortedException) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) HiveViewNotSupportedException(io.trino.plugin.hive.HiveViewNotSupportedException) Flatten(org.weakref.jmx.Flatten) ThriftMetastoreUtil.toMetastoreApiPartition(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ThriftMetastoreUtil.isAvroTableWithSchemaSet(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.isAvroTableWithSchemaSet) HIVE_TABLE_LOCK_NOT_ACQUIRED(io.trino.plugin.hive.HiveErrorCode.HIVE_TABLE_LOCK_NOT_ACQUIRED) ThriftMetastoreUtil.fromMetastoreApiPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) ThriftMetastoreAuthenticationType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreAuthenticationConfig.ThriftMetastoreAuthenticationType) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) RoleGrant(io.trino.spi.security.RoleGrant) Table(org.apache.hadoop.hive.metastore.api.Table) System.nanoTime(java.lang.System.nanoTime) TableType(org.apache.hadoop.hive.metastore.TableType) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) Duration(io.airlift.units.Duration) NonEvictableLoadingCache(io.trino.collect.cache.NonEvictableLoadingCache) AcidTransactionOwner(io.trino.plugin.hive.metastore.AcidTransactionOwner) ThriftMetastoreUtil.fromMetastoreApiTable(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) ThriftMetastoreUtil.fromTrinoPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromTrinoPrincipalType) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) NoSuchLockException(org.apache.hadoop.hive.metastore.api.NoSuchLockException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) Predicate(java.util.function.Predicate) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) CacheLoader(com.google.common.cache.CacheLoader) SafeCaches.buildNonEvictableCache(io.trino.collect.cache.SafeCaches.buildNonEvictableCache) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Pattern(java.util.regex.Pattern) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) RetryDriver(io.trino.plugin.hive.util.RetryDriver) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) Partition(org.apache.hadoop.hive.metastore.api.Partition) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) TxnToWriteId(org.apache.hadoop.hive.metastore.api.TxnToWriteId) Inject(javax.inject.Inject) HashSet(java.util.HashSet) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) LockLevel(org.apache.hadoop.hive.metastore.api.LockLevel) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) TApplicationException(org.apache.thrift.TApplicationException) FALSE(java.lang.Boolean.FALSE) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) Iterator(java.util.Iterator) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) TupleDomain(io.trino.spi.predicate.TupleDomain) ThriftMetastoreUtil.fromRolePrincipalGrants(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants) ThriftMetastoreUtil.createMetastoreColumnStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ThriftMetastoreUtil.parsePrivilege(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege) Closeable(java.io.Closeable) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) HiveConfig(io.trino.plugin.hive.HiveConfig) SECONDS(java.util.concurrent.TimeUnit.SECONDS) Table(org.apache.hadoop.hive.metastore.api.Table) ThriftMetastoreUtil.fromMetastoreApiTable(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) Optional(java.util.Optional) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) SchemaTableName(io.trino.spi.connector.SchemaTableName) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong) HiveType(io.trino.plugin.hive.HiveType)

Example 8 with AcidTransaction

use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.

the class OrcFileWriterFactory method createFileWriter.

@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
    if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    CompressionKind compression = getCompression(schema, configuration);
    // existing tables and partitions may have columns in a different order than the writer is providing, so build
    // an index to rearrange columns in the proper order
    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    if (transaction.isAcidDeleteOperation(writerKind)) {
        // For delete, set the "row" column to -1
        fileInputColumnIndexes[fileInputColumnIndexes.length - 1] = -1;
    }
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
        OrcDataSink orcDataSink = createOrcDataSink(fileSystem, path);
        Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
        if (isOrcOptimizedWriterValidate(session)) {
            validationInputFactory = Optional.of(() -> {
                try {
                    return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), new OrcReaderOptions(), fileSystem.open(path), readStats);
                } catch (IOException e) {
                    throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
                }
            });
        }
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        if (transaction.isInsert() && useAcidSchema) {
            // Only add the ACID columns if the request is for insert-type operations - - for delete operations,
            // the columns are added by the caller.  This is because the ACID columns for delete operations
            // depend on the rows being deleted, whereas the ACID columns for INSERT are completely determined
            // by bucket and writeId.
            Type rowType = createRowType(fileColumnNames, fileColumnTypes);
            fileColumnNames = ACID_COLUMN_NAMES;
            fileColumnTypes = createAcidColumnPrestoTypes(rowType);
        }
        return Optional.of(new OrcFileWriter(orcDataSink, writerKind, transaction, useAcidSchema, bucketNumber, rollbackAction, fileColumnNames, fileColumnTypes, createRootOrcType(fileColumnNames, fileColumnTypes), compression, getOrcWriterOptions(schema, orcWriterOptions).withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats));
    } catch (IOException e) {
        throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
    }
}
Also used : OutputStreamOrcDataSink(io.trino.orc.OutputStreamOrcDataSink) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(io.trino.plugin.hive.HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory) OrcDataSink(io.trino.orc.OrcDataSink) OrcWriterStats(io.trino.orc.OrcWriterStats) HiveUtil.getColumnNames(io.trino.plugin.hive.util.HiveUtil.getColumnNames) OrcConf(org.apache.orc.OrcConf) Path(org.apache.hadoop.fs.Path) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) OrcDataSource(io.trino.orc.OrcDataSource) ENGLISH(java.util.Locale.ENGLISH) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) AcidSchema.createRowType(io.trino.plugin.hive.acid.AcidSchema.createRowType) HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(io.trino.plugin.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize) TrinoException(io.trino.spi.TrinoException) HiveSessionProperties.getOrcOptimizedWriterValidateMode(io.trino.plugin.hive.HiveSessionProperties.getOrcOptimizedWriterValidateMode) WriterKind(io.trino.plugin.hive.WriterKind) HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(io.trino.plugin.hive.HiveSessionProperties.getOrcOptimizedWriterMinStripeSize) OrcDataSourceId(io.trino.orc.OrcDataSourceId) List(java.util.List) AcidSchema.createAcidColumnPrestoTypes(io.trino.plugin.hive.acid.AcidSchema.createAcidColumnPrestoTypes) HIVE_WRITE_VALIDATION_FAILED(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED) Optional(java.util.Optional) ACID_COLUMN_NAMES(io.trino.plugin.hive.acid.AcidSchema.ACID_COLUMN_NAMES) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) HiveFileWriterFactory(io.trino.plugin.hive.HiveFileWriterFactory) Flatten(org.weakref.jmx.Flatten) Type(io.trino.spi.type.Type) Callable(java.util.concurrent.Callable) OptionalInt(java.util.OptionalInt) Supplier(java.util.function.Supplier) FileWriter(io.trino.plugin.hive.FileWriter) Inject(javax.inject.Inject) Managed(org.weakref.jmx.Managed) NodeVersion(io.trino.plugin.hive.NodeVersion) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Objects.requireNonNull(java.util.Objects.requireNonNull) HiveSessionProperties.isOrcOptimizedWriterValidate(io.trino.plugin.hive.HiveSessionProperties.isOrcOptimizedWriterValidate) HIVE_WRITER_OPEN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) HiveSessionProperties.getOrcStringStatisticsLimit(io.trino.plugin.hive.HiveSessionProperties.getOrcStringStatisticsLimit) HiveUtil.getColumnTypes(io.trino.plugin.hive.util.HiveUtil.getColumnTypes) Properties(java.util.Properties) HIVE_UNSUPPORTED_FORMAT(io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveUtil.getOrcWriterOptions(io.trino.plugin.hive.util.HiveUtil.getOrcWriterOptions) CompressionKind(io.trino.orc.metadata.CompressionKind) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) OrcType.createRootOrcType(io.trino.orc.metadata.OrcType.createRootOrcType) OrcWriterOptions(io.trino.orc.OrcWriterOptions) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) TypeManager(io.trino.spi.type.TypeManager) HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(io.trino.plugin.hive.HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows) CompressionKind(io.trino.orc.metadata.CompressionKind) OrcDataSourceId(io.trino.orc.OrcDataSourceId) OutputStreamOrcDataSink(io.trino.orc.OutputStreamOrcDataSink) OrcDataSink(io.trino.orc.OrcDataSink) IOException(java.io.IOException) AcidSchema.createRowType(io.trino.plugin.hive.acid.AcidSchema.createRowType) Type(io.trino.spi.type.Type) OrcType.createRootOrcType(io.trino.orc.metadata.OrcType.createRootOrcType) OrcReaderOptions(io.trino.orc.OrcReaderOptions) FileSystem(org.apache.hadoop.fs.FileSystem) TrinoException(io.trino.spi.TrinoException) Supplier(java.util.function.Supplier)

Example 9 with AcidTransaction

use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.

the class ParquetFileWriterFactory method createFileWriter.

@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
    if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }
    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }
    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session)).setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session)).setBatchSize(HiveSessionProperties.getParquetBatchSize(session)).build();
    CompressionCodecName compressionCodecName = getCompression(conf);
    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
    int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };
        ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(fileColumnTypes, fileColumnNames);
        return Optional.of(new ParquetFileWriter(fileSystem.create(path, false), rollbackAction, fileColumnTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, fileInputColumnIndexes, compressionCodecName, nodeVersion.toString()));
    } catch (IOException e) {
        throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}
Also used : FileSystem(org.apache.hadoop.fs.FileSystem) Type(io.trino.spi.type.Type) Callable(java.util.concurrent.Callable) OptionalInt(java.util.OptionalInt) FileWriter(io.trino.plugin.hive.FileWriter) Inject(javax.inject.Inject) HiveUtil.getColumnNames(io.trino.plugin.hive.util.HiveUtil.getColumnNames) NodeVersion(io.trino.plugin.hive.NodeVersion) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) ParquetWriterOptions(io.trino.parquet.writer.ParquetWriterOptions) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) HIVE_WRITER_OPEN_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HiveUtil.getColumnTypes(io.trino.plugin.hive.util.HiveUtil.getColumnTypes) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Properties(java.util.Properties) ParquetOutputFormat(org.apache.parquet.hadoop.ParquetOutputFormat) ParquetSchemaConverter(io.trino.parquet.writer.ParquetSchemaConverter) HiveSessionProperties.getTimestampPrecision(io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision) TrinoException(io.trino.spi.TrinoException) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) HiveSessionProperties(io.trino.plugin.hive.HiveSessionProperties) WriterKind(io.trino.plugin.hive.WriterKind) JobConf(org.apache.hadoop.mapred.JobConf) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) Optional(java.util.Optional) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) HiveFileWriterFactory(io.trino.plugin.hive.HiveFileWriterFactory) TypeManager(io.trino.spi.type.TypeManager) IOException(java.io.IOException) Type(io.trino.spi.type.Type) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) FileSystem(org.apache.hadoop.fs.FileSystem) TrinoException(io.trino.spi.TrinoException) ParquetWriterOptions(io.trino.parquet.writer.ParquetWriterOptions) ParquetSchemaConverter(io.trino.parquet.writer.ParquetSchemaConverter)

Example 10 with AcidTransaction

use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.

the class SemiTransactionalHiveMetastore method commitShared.

@GuardedBy("this")
private void commitShared() {
    checkHoldsLock();
    AcidTransaction transaction = currentHiveTransaction.isEmpty() ? NO_ACID_TRANSACTION : currentHiveTransaction.get().getTransaction();
    Committer committer = new Committer(transaction);
    try {
        for (Map.Entry<SchemaTableName, Action<TableAndMore>> entry : tableActions.entrySet()) {
            SchemaTableName schemaTableName = entry.getKey();
            Action<TableAndMore> action = entry.getValue();
            switch(action.getType()) {
                case DROP:
                    committer.prepareDropTable(schemaTableName);
                    break;
                case ALTER:
                    committer.prepareAlterTable(action.getHdfsContext(), action.getQueryId(), action.getData());
                    break;
                case ADD:
                    committer.prepareAddTable(action.getHdfsContext(), action.getQueryId(), action.getData());
                    break;
                case INSERT_EXISTING:
                    committer.prepareInsertExistingTable(action.getHdfsContext(), action.getQueryId(), action.getData());
                    break;
                case DELETE_ROWS:
                    committer.prepareDeleteRowsFromExistingTable(action.getHdfsContext(), action.getData());
                    break;
                case UPDATE:
                    committer.prepareUpdateExistingTable(action.getHdfsContext(), action.getData());
                    break;
                default:
                    throw new IllegalStateException("Unknown action type");
            }
        }
        for (Map.Entry<SchemaTableName, Map<List<String>, Action<PartitionAndMore>>> tableEntry : partitionActions.entrySet()) {
            SchemaTableName schemaTableName = tableEntry.getKey();
            for (Map.Entry<List<String>, Action<PartitionAndMore>> partitionEntry : tableEntry.getValue().entrySet()) {
                List<String> partitionValues = partitionEntry.getKey();
                Action<PartitionAndMore> action = partitionEntry.getValue();
                switch(action.getType()) {
                    case DROP:
                        committer.prepareDropPartition(schemaTableName, partitionValues, true);
                        break;
                    case DROP_PRESERVE_DATA:
                        committer.prepareDropPartition(schemaTableName, partitionValues, false);
                        break;
                    case ALTER:
                        committer.prepareAlterPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
                        break;
                    case ADD:
                        committer.prepareAddPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
                        break;
                    case INSERT_EXISTING:
                        committer.prepareInsertExistingPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
                        break;
                    case UPDATE:
                    case DELETE_ROWS:
                        break;
                    default:
                        throw new IllegalStateException("Unknown action type");
                }
            }
        }
        // Wait for all renames submitted for "INSERT_EXISTING" action to finish
        committer.waitForAsyncRenames();
        // At this point, all file system operations, whether asynchronously issued or not, have completed successfully.
        // We are moving on to metastore operations now.
        committer.executeAddTableOperations();
        committer.executeAlterTableOperations();
        committer.executeAlterPartitionOperations();
        committer.executeAddPartitionOperations(transaction);
        committer.executeUpdateStatisticsOperations(transaction);
    } catch (Throwable t) {
        log.warn("Rolling back due to metastore commit failure: %s", t.getMessage());
        try {
            committer.cancelUnstartedAsyncRenames();
            committer.undoUpdateStatisticsOperations(transaction);
            committer.undoAddPartitionOperations();
            committer.undoAddTableOperations();
            committer.waitForAsyncRenamesSuppressThrowables();
            // fileRenameFutures must all come back before any file system cleanups are carried out.
            // Otherwise, files that should be deleted may be created after cleanup is done.
            committer.executeCleanupTasksForAbort(declaredIntentionsToWrite);
            committer.executeRenameTasksForAbort();
            // Partition directory must be put back before relevant metastore operation can be undone
            committer.undoAlterTableOperations();
            committer.undoAlterPartitionOperations();
            rollbackShared();
        } catch (RuntimeException e) {
            t.addSuppressed(new Exception("Failed to roll back after commit failure", e));
        }
        throw t;
    } finally {
        committer.executeTableInvalidationCallback();
    }
    try {
        // After this line, operations are no longer reversible.
        // The next section will deal with "dropping table/partition". Commit may still fail in
        // this section. Even if commit fails, cleanups, instead of rollbacks, will be executed.
        committer.executeIrreversibleMetastoreOperations();
    // If control flow reached this point, this commit is considered successful no matter
    // what happens later. The only kind of operations that haven't been carried out yet
    // are cleanups.
    // The program control flow will go to finally next. And cleanup will run because
    // moveForwardInFinally has been set to false.
    } finally {
        // In this method, all operations are best-effort clean up operations.
        // If any operation fails, the error will be logged and ignored.
        // Additionally, other clean up operations should still be attempted.
        // Execute deletion tasks
        committer.executeDeletionTasksForFinish();
        // Clean up staging directories (that may recursively contain empty directories or stale files from failed attempts)
        committer.pruneAndDeleteStagingDirectories(declaredIntentionsToWrite);
    }
}
Also used : AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) SchemaTableName(io.trino.spi.connector.SchemaTableName) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) IOException(java.io.IOException) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) ValidTxnWriteIdList(org.apache.hadoop.hive.common.ValidTxnWriteIdList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) GuardedBy(javax.annotation.concurrent.GuardedBy)

Aggregations

AcidTransaction (io.trino.plugin.hive.acid.AcidTransaction)13 TrinoException (io.trino.spi.TrinoException)12 IOException (java.io.IOException)10 List (java.util.List)10 Path (org.apache.hadoop.fs.Path)10 ImmutableMap (com.google.common.collect.ImmutableMap)9 Type (io.trino.spi.type.Type)9 Objects.requireNonNull (java.util.Objects.requireNonNull)9 Optional (java.util.Optional)9 ConnectorSession (io.trino.spi.connector.ConnectorSession)8 SchemaTableName (io.trino.spi.connector.SchemaTableName)8 FileSystem (org.apache.hadoop.fs.FileSystem)8 ImmutableList (com.google.common.collect.ImmutableList)7 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)7 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)7 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)7 Map (java.util.Map)7 OptionalInt (java.util.OptionalInt)7 Properties (java.util.Properties)7 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)6