use of io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle in project trino by trinodb.
the class IcebergMetadata method getTableHandleForOptimize.
private Optional<ConnectorTableExecuteHandle> getTableHandleForOptimize(ConnectorSession session, IcebergTableHandle tableHandle, Map<String, Object> executeProperties, RetryMode retryMode) {
DataSize maxScannedFileSize = (DataSize) executeProperties.get("file_size_threshold");
Table icebergTable = catalog.loadTable(session, tableHandle.getSchemaTableName());
return Optional.of(new IcebergTableExecuteHandle(tableHandle.getSchemaTableName(), OPTIMIZE, new IcebergOptimizeHandle(SchemaParser.toJson(icebergTable.schema()), PartitionSpecParser.toJson(icebergTable.spec()), getColumns(icebergTable.schema(), typeManager), getFileFormat(icebergTable), icebergTable.properties(), maxScannedFileSize, retryMode != NO_RETRIES), icebergTable.location()));
}
use of io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle in project trino by trinodb.
the class IcebergPageSinkProvider method createPageSink.
@Override
public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableExecuteHandle tableExecuteHandle) {
IcebergTableExecuteHandle executeHandle = (IcebergTableExecuteHandle) tableExecuteHandle;
switch(executeHandle.getProcedureId()) {
case OPTIMIZE:
HdfsContext hdfsContext = new HdfsContext(session);
IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
Schema schema = SchemaParser.fromJson(optimizeHandle.getSchemaAsJson());
PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, optimizeHandle.getPartitionSpecAsJson());
LocationProvider locationProvider = getLocationProvider(executeHandle.getSchemaTableName(), executeHandle.getTableLocation(), optimizeHandle.getTableStorageProperties());
return new IcebergPageSink(schema, partitionSpec, locationProvider, fileWriterFactory, pageIndexerFactory, hdfsEnvironment, hdfsContext, optimizeHandle.getTableColumns(), jsonCodec, session, optimizeHandle.getFileFormat(), optimizeHandle.getTableStorageProperties(), maxOpenPartitions);
}
throw new IllegalArgumentException("Unknown procedure: " + executeHandle.getProcedureId());
}
use of io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle in project trino by trinodb.
the class IcebergMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, IcebergTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
Table icebergTable = transaction.table();
// paths to be deleted
Set<DataFile> scannedFiles = splitSourceInfo.stream().map(DataFile.class::cast).collect(toImmutableSet());
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
Set<DataFile> newFiles = new HashSet<>();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(optimizeHandle.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
newFiles.add(builder.build());
}
if (scannedFiles.isEmpty() && newFiles.isEmpty()) {
// Table scan turned out to be empty, nothing to commit
transaction = null;
return;
}
// try to leave as little garbage as possible behind
if (optimizeHandle.isRetriesEnabled()) {
cleanExtraOutputFiles(session, newFiles.stream().map(dataFile -> dataFile.path().toString()).collect(toImmutableSet()));
}
RewriteFiles rewriteFiles = transaction.newRewrite();
rewriteFiles.rewriteFiles(scannedFiles, newFiles);
rewriteFiles.commit();
transaction.commitTransaction();
transaction = null;
}
use of io.trino.plugin.iceberg.procedure.IcebergOptimizeHandle in project trino by trinodb.
the class IcebergMetadata method beginOptimize.
private BeginTableExecuteResult<ConnectorTableExecuteHandle, ConnectorTableHandle> beginOptimize(ConnectorSession session, IcebergTableExecuteHandle executeHandle, IcebergTableHandle table) {
IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
Table icebergTable = catalog.loadTable(session, table.getSchemaTableName());
verify(transaction == null, "transaction already set");
transaction = icebergTable.newTransaction();
return new BeginTableExecuteResult<>(executeHandle, table.forOptimize(true, optimizeHandle.getMaxScannedFileSize()));
}
Aggregations