use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class OrcPageSourceFactory method createOrcPageSource.
private ConnectorPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, ConnectorIdentity identity, Configuration configuration, Path path, long start, long length, long estimatedFileSize, List<HiveColumnHandle> columns, List<HiveColumnHandle> projections, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, OrcReaderOptions options, Optional<AcidInfo> acidInfo, OptionalInt bucketNumber, boolean originalFile, AcidTransaction transaction, FileFormatDataSourceStats stats) {
for (HiveColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR, "column type must be regular: %s", column);
}
checkArgument(!effectivePredicate.isNone());
OrcDataSource orcDataSource;
boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(identity, path, configuration);
FSDataInputStream inputStream = hdfsEnvironment.doAs(identity, () -> fileSystem.open(path));
orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), estimatedFileSize, options, inputStream, stats);
} catch (Exception e) {
if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
}
AggregatedMemoryContext memoryUsage = newSimpleAggregatedMemoryContext();
try {
Optional<OrcReader> optionalOrcReader = OrcReader.createOrcReader(orcDataSource, options);
if (optionalOrcReader.isEmpty()) {
return new EmptyPageSource();
}
OrcReader reader = optionalOrcReader.get();
if (!originalFile && acidInfo.isPresent() && !acidInfo.get().isOrcAcidVersionValidated()) {
validateOrcAcidVersion(path, reader);
}
List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
int actualColumnCount = columns.size() + (isFullAcid ? 3 : 0);
List<OrcColumn> fileReadColumns = new ArrayList<>(actualColumnCount);
List<Type> fileReadTypes = new ArrayList<>(actualColumnCount);
List<OrcReader.ProjectedLayout> fileReadLayouts = new ArrayList<>(actualColumnCount);
if (isFullAcid && !originalFilesPresent) {
verifyAcidSchema(reader, path);
Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_STRUCT.toLowerCase(ENGLISH)).getNestedColumns(), columns);
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
fileReadTypes.add(INTEGER);
fileReadLayouts.add(fullyProjectedLayout());
fileReadColumns.add(acidColumnsByName.get(AcidSchema.ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
fileReadTypes.add(BIGINT);
fileReadLayouts.add(fullyProjectedLayout());
}
Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
verifyFileHasColumnNames(fileColumns, path);
// Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
}
Map<String, List<List<String>>> projectionsByColumnName = ImmutableMap.of();
Map<Integer, List<List<String>>> projectionsByColumnIndex = ImmutableMap.of();
if (useOrcColumnNames || isFullAcid) {
projectionsByColumnName = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseColumnName, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
} else {
projectionsByColumnIndex = projections.stream().collect(Collectors.groupingBy(HiveColumnHandle::getBaseHiveColumnIndex, mapping(OrcPageSourceFactory::getDereferencesAsList, toList())));
}
TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(options.isBloomFiltersEnabled()).setDomainCompactionThreshold(domainCompactionThreshold);
Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
for (HiveColumnHandle column : columns) {
OrcColumn orcColumn = null;
OrcReader.ProjectedLayout projectedLayout = null;
Map<Optional<HiveColumnProjectionInfo>, Domain> columnDomains = null;
if (useOrcColumnNames || isFullAcid) {
String columnName = column.getName().toLowerCase(ENGLISH);
orcColumn = fileColumnsByName.get(columnName);
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnName.get(columnName));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseColumnName().toLowerCase(ENGLISH).equals(columnName)).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
} else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
orcColumn = fileColumns.get(column.getBaseHiveColumnIndex());
if (orcColumn != null) {
projectedLayout = createProjectedLayout(orcColumn, projectionsByColumnIndex.get(column.getBaseHiveColumnIndex()));
columnDomains = effectivePredicateDomains.entrySet().stream().filter(columnDomain -> columnDomain.getKey().getBaseHiveColumnIndex() == column.getBaseHiveColumnIndex()).collect(toImmutableMap(columnDomain -> columnDomain.getKey().getHiveColumnProjectionInfo(), Map.Entry::getValue));
}
}
Type readType = column.getType();
if (orcColumn != null) {
int sourceIndex = fileReadColumns.size();
columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
fileReadColumns.add(orcColumn);
fileReadTypes.add(readType);
fileReadLayouts.add(projectedLayout);
// Add predicates on top-level and nested columns
for (Map.Entry<Optional<HiveColumnProjectionInfo>, Domain> columnDomain : columnDomains.entrySet()) {
OrcColumn nestedColumn = getNestedColumn(orcColumn, columnDomain.getKey());
if (nestedColumn != null) {
predicateBuilder.addColumn(nestedColumn.getColumnId(), columnDomain.getValue());
}
}
} else {
columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
}
}
OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, fileReadLayouts, predicateBuilder.build(), start, length, legacyFileTimeZone, memoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), NameBasedFieldMapper::create);
Optional<OrcDeletedRows> deletedRows = acidInfo.map(info -> new OrcDeletedRows(path.getName(), new OrcDeleteDeltaPageSourceFactory(options, identity, configuration, hdfsEnvironment, stats), identity, configuration, hdfsEnvironment, info, bucketNumber, memoryUsage));
Optional<Long> originalFileRowId = acidInfo.filter(OrcPageSourceFactory::hasOriginalFiles).map(info -> OriginalFilesUtils.getPrecedingRowCount(acidInfo.get().getOriginalFiles(), path, hdfsEnvironment, identity, options, configuration, stats));
if (transaction.isDelete()) {
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(ColumnAdaptation.originalFileRowIdColumn(startingRowId, bucket));
} else {
columnAdaptations.add(ColumnAdaptation.rowIdColumn());
}
} else if (transaction.isUpdate()) {
HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
List<HiveColumnHandle> dependencyColumns = projections.stream().filter(HiveColumnHandle::isBaseColumn).collect(toImmutableList());
if (originalFile) {
int bucket = bucketNumber.orElse(0);
long startingRowId = originalFileRowId.orElse(0L);
columnAdaptations.add(updatedRowColumnsWithOriginalFiles(startingRowId, bucket, updateProcessor, dependencyColumns));
} else {
columnAdaptations.add(updatedRowColumns(updateProcessor, dependencyColumns));
}
}
return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, originalFileRowId, memoryUsage, stats);
} catch (Exception e) {
try {
orcDataSource.close();
} catch (IOException ignored) {
}
if (e instanceof TrinoException) {
throw (TrinoException) e;
}
String message = splitError(e, path, start, length);
if (e instanceof BlockMissingException) {
throw new TrinoException(HIVE_MISSING_DATA, message, e);
}
throw new TrinoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
}
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class ThriftHiveMetastore method updateTableStatistics.
@Override
public void updateTableStatistics(HiveIdentity identity, String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
Table originalTable = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
PartitionStatistics currentStatistics = getTableStatistics(identity, originalTable);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Table modifiedTable = originalTable.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
if (transaction.isAcidTransactionRunning()) {
modifiedTable.setWriteId(transaction.getWriteId());
}
alterTable(identity, databaseName, tableName, modifiedTable);
io.trino.plugin.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable);
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().flatMap(entry -> {
Optional<Column> column = table.getColumn(entry.getKey());
if (column.isEmpty() && isAvroTableWithSchemaSet(modifiedTable)) {
// to store statistics for a column it does not know about.
return Stream.of();
}
HiveType type = column.orElseThrow(() -> new IllegalStateException("Column not found: " + entry.getKey())).getType();
return Stream.of(createMetastoreColumnStatistics(entry.getKey(), type, entry.getValue(), rowCount));
}).collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(identity, databaseName, tableName, metastoreColumnStatistics);
}
Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(identity, databaseName, tableName, column));
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
if (transaction.isAcidDeleteOperation(writerKind)) {
// For delete, set the "row" column to -1
fileInputColumnIndexes[fileInputColumnIndexes.length - 1] = -1;
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), new OrcReaderOptions(), fileSystem.open(path), readStats);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
if (transaction.isInsert() && useAcidSchema) {
// Only add the ACID columns if the request is for insert-type operations - - for delete operations,
// the columns are added by the caller. This is because the ACID columns for delete operations
// depend on the rows being deleted, whereas the ACID columns for INSERT are completely determined
// by bucket and writeId.
Type rowType = createRowType(fileColumnNames, fileColumnTypes);
fileColumnNames = ACID_COLUMN_NAMES;
fileColumnTypes = createAcidColumnPrestoTypes(rowType);
}
return Optional.of(new OrcFileWriter(orcDataSink, writerKind, transaction, useAcidSchema, bucketNumber, rollbackAction, fileColumnNames, fileColumnTypes, createRootOrcType(fileColumnNames, fileColumnTypes), compression, getOrcWriterOptions(schema, orcWriterOptions).withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats));
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class ParquetFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session)).setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session)).setBatchSize(HiveSessionProperties.getParquetBatchSize(session)).build();
CompressionCodecName compressionCodecName = getCompression(conf);
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, conf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
ParquetSchemaConverter schemaConverter = new ParquetSchemaConverter(fileColumnTypes, fileColumnNames);
return Optional.of(new ParquetFileWriter(fileSystem.create(path, false), rollbackAction, fileColumnTypes, schemaConverter.getMessageType(), schemaConverter.getPrimitiveTypes(), parquetWriterOptions, fileInputColumnIndexes, compressionCodecName, nodeVersion.toString()));
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
use of io.trino.plugin.hive.acid.AcidTransaction in project trino by trinodb.
the class SemiTransactionalHiveMetastore method commitShared.
@GuardedBy("this")
private void commitShared() {
checkHoldsLock();
AcidTransaction transaction = currentHiveTransaction.isEmpty() ? NO_ACID_TRANSACTION : currentHiveTransaction.get().getTransaction();
Committer committer = new Committer(transaction);
try {
for (Map.Entry<SchemaTableName, Action<TableAndMore>> entry : tableActions.entrySet()) {
SchemaTableName schemaTableName = entry.getKey();
Action<TableAndMore> action = entry.getValue();
switch(action.getType()) {
case DROP:
committer.prepareDropTable(schemaTableName);
break;
case ALTER:
committer.prepareAlterTable(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case ADD:
committer.prepareAddTable(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case INSERT_EXISTING:
committer.prepareInsertExistingTable(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case DELETE_ROWS:
committer.prepareDeleteRowsFromExistingTable(action.getHdfsContext(), action.getData());
break;
case UPDATE:
committer.prepareUpdateExistingTable(action.getHdfsContext(), action.getData());
break;
default:
throw new IllegalStateException("Unknown action type");
}
}
for (Map.Entry<SchemaTableName, Map<List<String>, Action<PartitionAndMore>>> tableEntry : partitionActions.entrySet()) {
SchemaTableName schemaTableName = tableEntry.getKey();
for (Map.Entry<List<String>, Action<PartitionAndMore>> partitionEntry : tableEntry.getValue().entrySet()) {
List<String> partitionValues = partitionEntry.getKey();
Action<PartitionAndMore> action = partitionEntry.getValue();
switch(action.getType()) {
case DROP:
committer.prepareDropPartition(schemaTableName, partitionValues, true);
break;
case DROP_PRESERVE_DATA:
committer.prepareDropPartition(schemaTableName, partitionValues, false);
break;
case ALTER:
committer.prepareAlterPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case ADD:
committer.prepareAddPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case INSERT_EXISTING:
committer.prepareInsertExistingPartition(action.getHdfsContext(), action.getQueryId(), action.getData());
break;
case UPDATE:
case DELETE_ROWS:
break;
default:
throw new IllegalStateException("Unknown action type");
}
}
}
// Wait for all renames submitted for "INSERT_EXISTING" action to finish
committer.waitForAsyncRenames();
// At this point, all file system operations, whether asynchronously issued or not, have completed successfully.
// We are moving on to metastore operations now.
committer.executeAddTableOperations();
committer.executeAlterTableOperations();
committer.executeAlterPartitionOperations();
committer.executeAddPartitionOperations(transaction);
committer.executeUpdateStatisticsOperations(transaction);
} catch (Throwable t) {
log.warn("Rolling back due to metastore commit failure: %s", t.getMessage());
try {
committer.cancelUnstartedAsyncRenames();
committer.undoUpdateStatisticsOperations(transaction);
committer.undoAddPartitionOperations();
committer.undoAddTableOperations();
committer.waitForAsyncRenamesSuppressThrowables();
// fileRenameFutures must all come back before any file system cleanups are carried out.
// Otherwise, files that should be deleted may be created after cleanup is done.
committer.executeCleanupTasksForAbort(declaredIntentionsToWrite);
committer.executeRenameTasksForAbort();
// Partition directory must be put back before relevant metastore operation can be undone
committer.undoAlterTableOperations();
committer.undoAlterPartitionOperations();
rollbackShared();
} catch (RuntimeException e) {
t.addSuppressed(new Exception("Failed to roll back after commit failure", e));
}
throw t;
} finally {
committer.executeTableInvalidationCallback();
}
try {
// After this line, operations are no longer reversible.
// The next section will deal with "dropping table/partition". Commit may still fail in
// this section. Even if commit fails, cleanups, instead of rollbacks, will be executed.
committer.executeIrreversibleMetastoreOperations();
// If control flow reached this point, this commit is considered successful no matter
// what happens later. The only kind of operations that haven't been carried out yet
// are cleanups.
// The program control flow will go to finally next. And cleanup will run because
// moveForwardInFinally has been set to false.
} finally {
// In this method, all operations are best-effort clean up operations.
// If any operation fails, the error will be logged and ignored.
// Additionally, other clean up operations should still be attempted.
// Execute deletion tasks
committer.executeDeletionTasksForFinish();
// Clean up staging directories (that may recursively contain empty directories or stale files from failed attempts)
committer.pruneAndDeleteStagingDirectories(declaredIntentionsToWrite);
}
}
Aggregations