use of org.apache.iceberg.RewriteFiles in project iceberg by apache.
the class BaseRewriteDataFilesAction method doReplace.
@VisibleForTesting
void doReplace(Iterable<DataFile> deletedDataFiles, Iterable<DataFile> addedDataFiles, long startingSnapshotId) {
RewriteFiles rewriteFiles = table.newRewrite().validateFromSnapshot(startingSnapshotId).rewriteFiles(Sets.newHashSet(deletedDataFiles), Sets.newHashSet(addedDataFiles));
commit(rewriteFiles);
}
use of org.apache.iceberg.RewriteFiles in project iceberg by apache.
the class RewriteDataFilesCommitManager method commitFileGroups.
/**
* Perform a commit operation on the table adding and removing files as
* required for this set of file groups
* @param fileGroups fileSets to commit
*/
public void commitFileGroups(Set<RewriteFileGroup> fileGroups) {
Set<DataFile> rewrittenDataFiles = Sets.newHashSet();
Set<DataFile> addedDataFiles = Sets.newHashSet();
for (RewriteFileGroup group : fileGroups) {
rewrittenDataFiles = Sets.union(rewrittenDataFiles, group.rewrittenFiles());
addedDataFiles = Sets.union(addedDataFiles, group.addedFiles());
}
RewriteFiles rewrite = table.newRewrite().validateFromSnapshot(startingSnapshotId);
if (useStartingSequenceNumber) {
long sequenceNumber = table.snapshot(startingSnapshotId).sequenceNumber();
rewrite.rewriteFiles(rewrittenDataFiles, addedDataFiles, sequenceNumber);
} else {
rewrite.rewriteFiles(rewrittenDataFiles, addedDataFiles);
}
rewrite.commit();
}
use of org.apache.iceberg.RewriteFiles in project trino by trinodb.
the class IcebergMetadata method finishOptimize.
private void finishOptimize(ConnectorSession session, IcebergTableExecuteHandle executeHandle, Collection<Slice> fragments, List<Object> splitSourceInfo) {
IcebergOptimizeHandle optimizeHandle = (IcebergOptimizeHandle) executeHandle.getProcedureHandle();
Table icebergTable = transaction.table();
// paths to be deleted
Set<DataFile> scannedFiles = splitSourceInfo.stream().map(DataFile.class::cast).collect(toImmutableSet());
List<CommitTaskData> commitTasks = fragments.stream().map(slice -> commitTaskCodec.fromJson(slice.getBytes())).collect(toImmutableList());
Type[] partitionColumnTypes = icebergTable.spec().fields().stream().map(field -> field.transform().getResultType(icebergTable.schema().findType(field.sourceId()))).toArray(Type[]::new);
Set<DataFile> newFiles = new HashSet<>();
for (CommitTaskData task : commitTasks) {
DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()).withPath(task.getPath()).withFileSizeInBytes(task.getFileSizeInBytes()).withFormat(optimizeHandle.getFileFormat().toIceberg()).withMetrics(task.getMetrics().metrics());
if (!icebergTable.spec().fields().isEmpty()) {
String partitionDataJson = task.getPartitionDataJson().orElseThrow(() -> new VerifyException("No partition data for partitioned table"));
builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes));
}
newFiles.add(builder.build());
}
if (scannedFiles.isEmpty() && newFiles.isEmpty()) {
// Table scan turned out to be empty, nothing to commit
transaction = null;
return;
}
// try to leave as little garbage as possible behind
if (optimizeHandle.isRetriesEnabled()) {
cleanExtraOutputFiles(session, newFiles.stream().map(dataFile -> dataFile.path().toString()).collect(toImmutableSet()));
}
RewriteFiles rewriteFiles = transaction.newRewrite();
rewriteFiles.rewriteFiles(scannedFiles, newFiles);
rewriteFiles.commit();
transaction.commitTransaction();
transaction = null;
}
Aggregations