Search in sources :

Example 1 with TransactionConflictException

use of io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException in project trino by trinodb.

the class DeltaLakeMetadata method finishWrite.

private void finishWrite(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<Slice> fragments) {
    DeltaLakeTableHandle handle = (DeltaLakeTableHandle) tableHandle;
    List<DeltaLakeUpdateResult> updateResults = fragments.stream().map(Slice::getBytes).map(deleteResultJsonCodec::fromJson).collect(toImmutableList());
    String tableLocation = metastore.getTableLocation(handle.getSchemaTableName(), session);
    DeltaLakeTableHandle.WriteType writeType = handle.getWriteType().orElseThrow();
    String operation;
    switch(writeType) {
        case DELETE:
            operation = DELETE_OPERATION;
            break;
        case UPDATE:
            operation = UPDATE_OPERATION;
            break;
        default:
            throw new TrinoException(NOT_SUPPORTED, "Unsupported write type: " + writeType);
    }
    boolean writeCommitted = false;
    try {
        TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, tableLocation);
        long createdTime = Instant.now().toEpochMilli();
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(tableLocation));
        long commitVersion = getMandatoryCurrentVersion(fileSystem, new Path(tableLocation)) + 1;
        if (commitVersion != handle.getReadVersion() + 1) {
            throw new TransactionConflictException(format("Conflicting concurrent writes found. Expected transaction log version: %s, actual version: %s", handle.getReadVersion(), commitVersion - 1));
        }
        Optional<Long> checkpointInterval = handle.getMetadataEntry().getCheckpointInterval();
        transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(commitVersion, createdTime, session.getUser(), session.getUser(), operation, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, // TODO Insert fills this in with, probably should do so here too
        0, ISOLATION_LEVEL, true));
        // TODO: Delta writes another field "operationMetrics" that I haven't
        // seen before. It contains delete/update metrics. Investigate/include it.
        long writeTimestamp = Instant.now().toEpochMilli();
        for (DeltaLakeUpdateResult updateResult : updateResults) {
            transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(updateResult.getOldFile(), writeTimestamp, true));
        }
        appendAddFileEntries(transactionLogWriter, updateResults.stream().map(DeltaLakeUpdateResult::getNewFile).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList()), handle.getMetadataEntry().getOriginalPartitionColumns(), true);
        transactionLogWriter.flush();
        writeCommitted = true;
        writeCheckpointIfNeeded(session, new SchemaTableName(handle.getSchemaName(), handle.getTableName()), checkpointInterval, commitVersion);
    } catch (Exception e) {
        if (!writeCommitted) {
            // TODO perhaps it should happen in a background thread
            cleanupFailedWrite(session, tableLocation, updateResults.stream().map(DeltaLakeUpdateResult::getNewFile).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList()));
        }
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Optional(java.util.Optional) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) IOException(java.io.IOException) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TrinoException(io.trino.spi.TrinoException) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry)

Example 2 with TransactionConflictException

use of io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException in project trino by trinodb.

the class DeltaLakeMetadata method finishInsert.

@Override
public Optional<ConnectorOutputMetadata> finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection<Slice> fragments, Collection<ComputedStatistics> computedStatistics) {
    DeltaLakeInsertTableHandle handle = (DeltaLakeInsertTableHandle) insertHandle;
    List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
    boolean writeCommitted = false;
    try {
        TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, handle.getLocation());
        long createdTime = Instant.now().toEpochMilli();
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(new HdfsContext(session), new Path(handle.getLocation()));
        long commitVersion = getMandatoryCurrentVersion(fileSystem, new Path(handle.getLocation())) + 1;
        if (commitVersion != handle.getReadVersion() + 1) {
            throw new TransactionConflictException(format("Conflicting concurrent writes found. Expected transaction log version: %s, actual version: %s", handle.getReadVersion(), commitVersion - 1));
        }
        Optional<Long> checkpointInterval = handle.getMetadataEntry().getCheckpointInterval();
        transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(commitVersion, createdTime, session.getUser(), session.getUser(), INSERT_OPERATION, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, // it is not obvious why we need to persist this readVersion
        handle.getReadVersion(), ISOLATION_LEVEL, true));
        // Note: during writes we want to preserve original case of partition columns
        List<String> partitionColumns = handle.getMetadataEntry().getOriginalPartitionColumns();
        appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, true);
        transactionLogWriter.flush();
        writeCommitted = true;
        writeCheckpointIfNeeded(session, new SchemaTableName(handle.getSchemaName(), handle.getTableName()), checkpointInterval, commitVersion);
    } catch (Exception e) {
        if (!writeCommitted) {
            // TODO perhaps it should happen in a background thread
            cleanupFailedWrite(session, handle.getLocation(), dataFileInfos);
        }
        throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
    }
    return Optional.empty();
}
Also used : Path(org.apache.hadoop.fs.Path) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) SchemaTableName(io.trino.spi.connector.SchemaTableName) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) TransactionConflictException(io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException) IOException(java.io.IOException) NotADeltaLakeTableException(io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) TrinoException(io.trino.spi.TrinoException) FileNotFoundException(java.io.FileNotFoundException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Slice(io.airlift.slice.Slice) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils.isS3FileSystem(io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem) TrinoException(io.trino.spi.TrinoException) TransactionLogWriter(io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext)

Aggregations

JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)2 Slice (io.airlift.slice.Slice)2 NotADeltaLakeTableException (io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException)2 CommitInfoEntry (io.trino.plugin.deltalake.transactionlog.CommitInfoEntry)2 TransactionConflictException (io.trino.plugin.deltalake.transactionlog.writer.TransactionConflictException)2 TransactionLogWriter (io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriter)2 HdfsContext (io.trino.plugin.hive.HdfsEnvironment.HdfsContext)2 HiveWriteUtils.isS3FileSystem (io.trino.plugin.hive.util.HiveWriteUtils.isS3FileSystem)2 TrinoException (io.trino.spi.TrinoException)2 CatalogSchemaTableName (io.trino.spi.connector.CatalogSchemaTableName)2 SchemaNotFoundException (io.trino.spi.connector.SchemaNotFoundException)2 SchemaTableName (io.trino.spi.connector.SchemaTableName)2 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 RemoveFileEntry (io.trino.plugin.deltalake.transactionlog.RemoveFileEntry)1 Optional (java.util.Optional)1