use of org.apache.iceberg.actions.DeleteReachableFiles in project iceberg by apache.
the class TestDeleteReachableFilesAction method testIgnoreMetadataFilesNotFound.
@Test
public void testIgnoreMetadataFilesNotFound() {
table.updateProperties().set(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "1").commit();
table.newAppend().appendFile(FILE_A).commit();
// There are three metadata json files at this point
DeleteOrphanFiles.Result result = sparkActions().deleteOrphanFiles(table).olderThan(System.currentTimeMillis()).execute();
Assert.assertEquals("Should delete 1 file", 1, Iterables.size(result.orphanFileLocations()));
Assert.assertTrue("Should remove v1 file", StreamSupport.stream(result.orphanFileLocations().spliterator(), false).anyMatch(file -> file.contains("v1.metadata.json")));
DeleteReachableFiles baseRemoveFilesSparkAction = sparkActions().deleteReachableFiles(metadataLocation(table)).io(table.io());
DeleteReachableFiles.Result res = baseRemoveFilesSparkAction.execute();
checkRemoveFilesResults(1, 1, 1, 4, res);
}
use of org.apache.iceberg.actions.DeleteReachableFiles in project iceberg by apache.
the class TestDeleteReachableFilesAction method testRemoveFilesActionWithDefaultIO.
@Test
public void testRemoveFilesActionWithDefaultIO() {
table.newAppend().appendFile(FILE_A).commit();
table.newAppend().appendFile(FILE_B).commit();
// IO not set explicitly on removeReachableFiles action
// IO defaults to HadoopFileIO
DeleteReachableFiles baseRemoveFilesSparkAction = sparkActions().deleteReachableFiles(metadataLocation(table));
checkRemoveFilesResults(2, 2, 2, 4, baseRemoveFilesSparkAction.execute());
}
use of org.apache.iceberg.actions.DeleteReachableFiles in project iceberg by apache.
the class TestDeleteReachableFilesAction method dataFilesCleanupWithParallelTasks.
@Test
public void dataFilesCleanupWithParallelTasks() {
table.newFastAppend().appendFile(FILE_A).commit();
table.newFastAppend().appendFile(FILE_B).commit();
table.newRewrite().rewriteFiles(ImmutableSet.of(FILE_B), ImmutableSet.of(FILE_D)).commit();
table.newRewrite().rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_C)).commit();
Set<String> deletedFiles = ConcurrentHashMap.newKeySet();
Set<String> deleteThreads = ConcurrentHashMap.newKeySet();
AtomicInteger deleteThreadsIndex = new AtomicInteger(0);
DeleteReachableFiles.Result result = sparkActions().deleteReachableFiles(metadataLocation(table)).io(table.io()).executeDeleteWith(Executors.newFixedThreadPool(4, runnable -> {
Thread thread = new Thread(runnable);
thread.setName("remove-files-" + deleteThreadsIndex.getAndIncrement());
// daemon threads will be terminated abruptly when the JVM exits
thread.setDaemon(true);
return thread;
})).deleteWith(s -> {
deleteThreads.add(Thread.currentThread().getName());
deletedFiles.add(s);
}).execute();
// Verifies that the delete methods ran in the threads created by the provided ExecutorService ThreadFactory
Assert.assertEquals(deleteThreads, Sets.newHashSet("remove-files-0", "remove-files-1", "remove-files-2", "remove-files-3"));
Lists.newArrayList(FILE_A, FILE_B, FILE_C, FILE_D).forEach(file -> Assert.assertTrue("FILE_A should be deleted", deletedFiles.contains(FILE_A.path().toString())));
checkRemoveFilesResults(4L, 6L, 4L, 6, result);
}
use of org.apache.iceberg.actions.DeleteReachableFiles in project iceberg by apache.
the class TestDeleteReachableFilesAction method testEmptyIOThrowsException.
@Test
public void testEmptyIOThrowsException() {
DeleteReachableFiles baseRemoveFilesSparkAction = sparkActions().deleteReachableFiles(metadataLocation(table)).io(null);
AssertHelpers.assertThrows("FileIO needs to be set to use RemoveFiles action", IllegalArgumentException.class, "File IO cannot be null", baseRemoveFilesSparkAction::execute);
}
use of org.apache.iceberg.actions.DeleteReachableFiles in project iceberg by apache.
the class TestDeleteReachableFilesAction method testRemoveFilesActionWithReducedVersionsTable.
@Test
public void testRemoveFilesActionWithReducedVersionsTable() {
table.updateProperties().set(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "2").commit();
table.newAppend().appendFile(FILE_A).commit();
table.newAppend().appendFile(FILE_B).commit();
table.newAppend().appendFile(FILE_B).commit();
table.newAppend().appendFile(FILE_C).commit();
table.newAppend().appendFile(FILE_D).commit();
DeleteReachableFiles baseRemoveFilesSparkAction = sparkActions().deleteReachableFiles(metadataLocation(table)).io(table.io());
DeleteReachableFiles.Result result = baseRemoveFilesSparkAction.execute();
checkRemoveFilesResults(4, 5, 5, 8, result);
}
Aggregations