use of org.apache.iceberg.ExpireSnapshots in project incubator-gobblin by apache.
the class IcebergMetadataWriter method dropFiles.
/**
* Deal with both regular file deletions manifested by GMCE(aggregation but no commit),
* and expiring older snapshots(commit).
*/
protected void dropFiles(GobblinMetadataChangeEvent gmce, Map<String, Collection<HiveSpec>> oldSpecsMap, Table table, TableMetadata tableMetadata, TableIdentifier tid) throws IOException {
PartitionSpec partitionSpec = table.spec();
// Update DeleteFiles in tableMetadata: This is regular file deletion
DeleteFiles deleteFiles = tableMetadata.getOrInitDeleteFiles();
Set<DataFile> oldDataFiles = getIcebergDataFilesToBeDeleted(gmce, table, new HashMap<>(), oldSpecsMap, partitionSpec);
oldDataFiles.forEach(deleteFiles::deleteFile);
// Update ExpireSnapshots and commit the updates at once: This is for expiring snapshots that are
// beyond look-back allowance for time-travel.
parallelRunner.submitCallable(new Callable<Void>() {
@Override
public Void call() throws Exception {
try {
long olderThan = getExpireSnapshotTime();
long start = System.currentTimeMillis();
ExpireSnapshots expireSnapshots = table.expireSnapshots();
final Table tmpTable = table;
expireSnapshots.deleteWith(new Consumer<String>() {
@Override
public void accept(String file) {
if (file.startsWith(tmpTable.location())) {
tmpTable.io().deleteFile(file);
}
}
}).expireOlderThan(olderThan).commit();
// TODO: emit these metrics to Ingraphs, in addition to metrics for publishing new snapshots and other Iceberg metadata operations.
log.info("Spent {} ms to expire snapshots older than {} ({}) in table {}", System.currentTimeMillis() - start, new DateTime(olderThan).toString(), olderThan, tid.toString());
} catch (Exception e) {
log.error(String.format("Fail to expire snapshots for table %s due to exception ", tid.toString()), e);
}
return null;
}
}, tid.toString());
}
Aggregations