use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.
the class DeltaLakeMetadata method getTableHandleForOptimize.
private Optional<ConnectorTableExecuteHandle> getTableHandleForOptimize(DeltaLakeTableHandle tableHandle, Map<String, Object> executeProperties) {
DataSize maxScannedFileSize = (DataSize) executeProperties.get("file_size_threshold");
List<DeltaLakeColumnHandle> columns = getColumns(tableHandle.getMetadataEntry()).stream().filter(column -> column.getColumnType() != SYNTHESIZED).collect(toImmutableList());
return Optional.of(new DeltaLakeTableExecuteHandle(tableHandle.getSchemaTableName(), OPTIMIZE, new DeltaTableOptimizeHandle(tableHandle.getMetadataEntry(), columns, tableHandle.getMetadataEntry().getOriginalPartitionColumns(), maxScannedFileSize, Optional.empty()), tableHandle.getLocation()));
}
use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.
the class DeltaLakeMetadata method beginOptimize.
private BeginTableExecuteResult<ConnectorTableExecuteHandle, ConnectorTableHandle> beginOptimize(ConnectorSession session, DeltaLakeTableExecuteHandle executeHandle, DeltaLakeTableHandle table) {
DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
if (!allowWrite(session, table)) {
String fileSystem = new Path(table.getLocation()).toUri().getScheme();
throw new TrinoException(NOT_SUPPORTED, format("Optimize is not supported on the %s filesystem", fileSystem));
}
checkSupportedWriterVersion(session, table.getSchemaTableName());
return new BeginTableExecuteResult<>(executeHandle.withProcedureHandle(optimizeHandle.withCurrentVersion(table.getReadVersion())), table.forOptimize(true, optimizeHandle.getMaxScannedFileSize()));
}
use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.
the class DeltaLakeMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
long readVersion = optimizeHandle.getCurrentVersion().orElseThrow(() -> new IllegalArgumentException("currentVersion not set"));
Optional<Long> checkpointInterval = optimizeHandle.getMetadataEntry().getCheckpointInterval();
String tableLocation = executeHandle.getTableLocation();
// paths to be deleted
Set<Path> scannedPaths = splitSourceInfo.stream().map(file -> new Path((String) file)).collect(toImmutableSet());
// files to be added
List<DataFileInfo> dataFileInfos = fragments.stream().map(Slice::getBytes).map(dataFileInfoCodec::fromJson).collect(toImmutableList());
boolean writeCommitted = false;
try {
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, tableLocation);
long createdTime = Instant.now().toEpochMilli();
long commitVersion = readVersion + 1;
transactionLogWriter.appendCommitInfoEntry(new CommitInfoEntry(commitVersion, createdTime, session.getUser(), session.getUser(), OPTIMIZE_OPERATION, ImmutableMap.of("queryId", session.getQueryId()), null, null, "trino-" + nodeVersion + "-" + nodeId, readVersion, ISOLATION_LEVEL, true));
// TODO: Delta writes another field "operationMetrics" that I haven't
// seen before. It contains delete/update metrics. Investigate/include it.
long writeTimestamp = Instant.now().toEpochMilli();
for (Path scannedPath : scannedPaths) {
String relativePath = new Path(tableLocation).toUri().relativize(scannedPath.toUri()).toString();
transactionLogWriter.appendRemoveFileEntry(new RemoveFileEntry(relativePath, writeTimestamp, false));
}
// Note: during writes we want to preserve original case of partition columns
List<String> partitionColumns = optimizeHandle.getMetadataEntry().getOriginalPartitionColumns();
appendAddFileEntries(transactionLogWriter, dataFileInfos, partitionColumns, false);
transactionLogWriter.flush();
writeCommitted = true;
writeCheckpointIfNeeded(session, executeHandle.getSchemaTableName(), checkpointInterval, commitVersion);
} catch (Exception e) {
if (!writeCommitted) {
// TODO perhaps it should happen in a background thread
cleanupFailedWrite(session, tableLocation, dataFileInfos);
}
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
}
}
use of io.trino.plugin.deltalake.procedure.DeltaTableOptimizeHandle in project trino by trinodb.
the class DeltaLakeMetadata method getLayoutForOptimize.
private Optional<ConnectorTableLayout> getLayoutForOptimize(DeltaLakeTableExecuteHandle executeHandle) {
DeltaTableOptimizeHandle optimizeHandle = (DeltaTableOptimizeHandle) executeHandle.getProcedureHandle();
List<String> partitionColumnNames = optimizeHandle.getMetadataEntry().getCanonicalPartitionColumns();
if (partitionColumnNames.isEmpty()) {
return Optional.empty();
}
Map<String, DeltaLakeColumnHandle> columnsByName = optimizeHandle.getTableColumns().stream().collect(toImmutableMap(columnHandle -> columnHandle.getName().toLowerCase(Locale.ENGLISH), identity()));
ImmutableList.Builder<DeltaLakeColumnHandle> partitioningColumns = ImmutableList.builder();
for (String columnName : partitionColumnNames) {
partitioningColumns.add(columnsByName.get(columnName));
}
DeltaLakePartitioningHandle partitioningHandle = new DeltaLakePartitioningHandle(partitioningColumns.build());
return Optional.of(new ConnectorTableLayout(partitioningHandle, partitionColumnNames));
}
Aggregations