use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestClientRollback method testFailedRollbackCommit.
/**
* Test Cases for effects of rollbacking completed/inflight commits.
*/
@Test
public void testFailedRollbackCommit() throws Exception {
// Let's create some commit files and base files
final String p1 = "2016/05/01";
final String p2 = "2016/05/02";
final String p3 = "2016/05/06";
final String commitTime1 = "20160501010101";
final String commitTime2 = "20160502020601";
final String commitTime3 = "20160506030611";
Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
{
put(p1, "id11");
put(p2, "id12");
put(p3, "id13");
}
};
Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
{
put(p1, "id21");
put(p2, "id22");
put(p3, "id23");
}
};
Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
{
put(p1, "id31");
put(p2, "id32");
put(p3, "id33");
}
};
HoodieTestTable testTable = HoodieTestTable.of(metaClient).withPartitionMetaFiles(p1, p2, p3).addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
// Rollback commit3
client.rollback(commitTime3);
assertFalse(testTable.inflightCommitExists(commitTime3));
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
metaClient.reloadActiveTimeline();
List<HoodieInstant> rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(rollbackInstants.size(), 1);
HoodieInstant rollbackInstant = rollbackInstants.get(0);
// delete rollback completed meta file and retry rollback.
FileCreateUtils.deleteRollbackCommit(basePath, rollbackInstant.getTimestamp());
// recreate actual commit files so that we can retry the rollback
testTable.addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
// retry rolling back the commit again.
client.rollback(commitTime3);
// verify there are no extra rollback instants
metaClient.reloadActiveTimeline();
rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(rollbackInstants.size(), 1);
assertEquals(rollbackInstants.get(0), rollbackInstant);
final String commitTime4 = "20160507040601";
final String commitTime5 = "20160507050611";
// add inflight compaction then rolls it back
testTable.addInflightCompaction(commitTime4, new HoodieCommitMetadata());
HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan();
rollbackPlan.setRollbackRequests(Collections.emptyList());
rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime4, HoodieTimeline.COMPACTION_ACTION));
testTable.addRequestedRollback(commitTime5, rollbackPlan);
// the compaction instants should be excluded
metaClient.reloadActiveTimeline();
assertEquals(0, client.getPendingRollbackInfos(metaClient).size());
// verify there is no extra rollback instants
client.rollback(commitTime4);
metaClient.reloadActiveTimeline();
rollbackInstants = metaClient.reloadActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(2, rollbackInstants.size());
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestClientRollback method testAutoRollbackInflightCommit.
/**
* Test auto-rollback of commits which are in flight.
*/
@Test
public void testAutoRollbackInflightCommit() throws Exception {
// Let's create some commit files and base files
final String p1 = "2016/05/01";
final String p2 = "2016/05/02";
final String p3 = "2016/05/06";
final String commitTime1 = "20160501010101";
final String commitTime2 = "20160502020601";
final String commitTime3 = "20160506030611";
Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
{
put(p1, "id11");
put(p2, "id12");
put(p3, "id13");
}
};
Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
{
put(p1, "id21");
put(p2, "id22");
put(p3, "id23");
}
};
Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
{
put(p1, "id31");
put(p2, "id32");
put(p3, "id33");
}
};
// Set Failed Writes rollback to LAZY
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).build();
HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap1 = new HashMap<>();
partitionAndFileId1.forEach((k, v) -> partitionToFilesNameLengthMap1.put(k, Collections.singletonList(Pair.of(v, 100))));
testTable.doWriteOperation(commitTime1, WriteOperationType.INSERT, Arrays.asList(p1, p2, p3), partitionToFilesNameLengthMap1, false, false);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap2 = new HashMap<>();
partitionAndFileId2.forEach((k, v) -> partitionToFilesNameLengthMap2.put(k, Collections.singletonList(Pair.of(v, 200))));
testTable.doWriteOperation(commitTime2, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap2, false, true);
Map<String, List<Pair<String, Integer>>> partitionToFilesNameLengthMap3 = new HashMap<>();
partitionAndFileId3.forEach((k, v) -> partitionToFilesNameLengthMap3.put(k, Collections.singletonList(Pair.of(v, 300))));
testTable.doWriteOperation(commitTime3, WriteOperationType.INSERT, Collections.emptyList(), partitionToFilesNameLengthMap3, false, true);
final String commitTime4 = "20160506030621";
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
client.startCommitWithTime(commitTime4);
// Check results, nothing changed
assertTrue(testTable.commitExists(commitTime1));
assertTrue(testTable.inflightCommitExists(commitTime2));
assertTrue(testTable.inflightCommitExists(commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertTrue(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
}
// Set Failed Writes rollback to EAGER
config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
final String commitTime5 = "20160506030631";
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
client.startCommitWithTime(commitTime5);
assertTrue(testTable.commitExists(commitTime1));
assertFalse(testTable.inflightCommitExists(commitTime2));
assertFalse(testTable.inflightCommitExists(commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId1, commitTime1));
assertFalse(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestFileSystemBackedTableMetadata method testOneLevelPartitionedTable.
@Test
public void testOneLevelPartitionedTable() throws Exception {
String instant = "100";
hoodieTestTable = hoodieTestTable.addCommit(instant);
// Generate 10 files under each partition
ONE_LEVEL_PARTITIONS.stream().forEach(p -> {
try {
hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
});
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + ONE_LEVEL_PARTITIONS.get(0))).length);
List<String> fullPartitionPaths = ONE_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
for (String p : fullPartitionPaths) {
Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestFileSystemBackedTableMetadata method testMultiLevelPartitionedTable.
@Test
public void testMultiLevelPartitionedTable() throws Exception {
String instant = "100";
hoodieTestTable = hoodieTestTable.addCommit(instant);
// Generate 10 files under each partition
MULTI_LEVEL_PARTITIONS.stream().forEach(p -> {
try {
hoodieTestTable = hoodieTestTable.withPartitionMetaFiles(p).withBaseFilesInPartition(p, IntStream.range(0, 10).toArray());
} catch (Exception e) {
throw new RuntimeException(e);
}
});
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
FileSystemBackedTableMetadata fileSystemBackedTableMetadata = new FileSystemBackedTableMetadata(localEngineContext, new SerializableConfiguration(metaClient.getHadoopConf()), basePath, false);
Assertions.assertEquals(3, fileSystemBackedTableMetadata.getAllPartitionPaths().size());
Assertions.assertEquals(10, fileSystemBackedTableMetadata.getAllFilesInPartition(new Path(basePath + "/" + MULTI_LEVEL_PARTITIONS.get(0))).length);
List<String> fullPartitionPaths = MULTI_LEVEL_PARTITIONS.stream().map(p -> basePath + "/" + p).collect(Collectors.toList());
Map<String, FileStatus[]> partitionToFilesMap = fileSystemBackedTableMetadata.getAllFilesInPartitions(fullPartitionPaths);
for (String p : fullPartitionPaths) {
Assertions.assertEquals(10, partitionToFilesMap.get(p).length);
}
}
use of org.apache.hudi.common.testutils.HoodieTestTable in project hudi by apache.
the class TestCleaner method testCleanWithReplaceCommits.
@Test
public void testCleanWithReplaceCommits() throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
String p0 = "2020/01/01";
String p1 = "2020/01/02";
// make 1 commit, with 1 file per partition
String file1P0C0 = UUID.randomUUID().toString();
String file1P1C0 = UUID.randomUUID().toString();
testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001", Collections.unmodifiableMap(new HashMap<String, List<String>>() {
{
put(p0, CollectionUtils.createImmutableList(file1P0C0));
put(p1, CollectionUtils.createImmutableList(file1P1C0));
}
}));
metadataWriter.update(commitMetadata, "00000000000001", false);
metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
metaClient = HoodieTableMetaClient.reload(metaClient);
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. logically delete p0. No change to p1
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId002 = testTable.forReplaceCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0);
String file2P0C1 = partitionAndFileId002.get(p0);
Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata = generateReplaceCommitMetadata("00000000000002", p0, file1P0C0, file2P0C1);
testTable.addReplaceCommit("00000000000002", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace data in p1. No change to p0
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId003 = testTable.forReplaceCommit("00000000000003").getFileIdsWithBaseFilesInPartitions(p1);
String file3P1C2 = partitionAndFileId003.get(p1);
replaceMetadata = generateReplaceCommitMetadata("00000000000003", p1, file1P1C0, file3P1C2);
testTable.addReplaceCommit("00000000000003", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
assertEquals(0, hoodieCleanStatsThree.size(), "Must not scan any partitions and clean any files");
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace data in p0 again
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId004 = testTable.forReplaceCommit("00000000000004").getFileIdsWithBaseFilesInPartitions(p0);
String file4P0C3 = partitionAndFileId004.get(p0);
replaceMetadata = generateReplaceCommitMetadata("00000000000004", p0, file2P0C1, file4P0C3);
testTable.addReplaceCommit("00000000000004", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
// run cleaner
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, 5);
assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
// file1P1C0 still stays because its not replaced until 3 and its the only version available
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next replacecommit, with 1 clustering operation. Replace all data in p1. no new files created
// notice that clustering generates empty inflight commit files
Map<String, String> partitionAndFileId005 = testTable.forReplaceCommit("00000000000006").getFileIdsWithBaseFilesInPartitions(p1);
String file4P1C4 = partitionAndFileId005.get(p1);
replaceMetadata = generateReplaceCommitMetadata("00000000000006", p0, file3P1C2, file4P1C4);
testTable.addReplaceCommit("00000000000006", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, 7);
assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
assertFalse(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
}
Aggregations