use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.
the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnNumCommits.
@Test
public void testCompactionRetryOnFailureBasedOnNumCommits() throws Exception {
// Given: two commits, schedule compaction and its failed/in-flight
HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build()).build();
List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
String instantTime2;
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 100);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
// Schedule compaction instant2, make it in-flight (simulates inline compaction failing)
instantTime2 = HoodieActiveTimeline.createNewInstantTime();
scheduleCompaction(instantTime2, writeClient, cfg);
moveCompactionFromRequestedToInflight(instantTime2, cfg);
}
// When: a third commit happens
HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(2, 60, CompactionTriggerStrategy.NUM_COMMITS);
String instantTime3 = HoodieActiveTimeline.createNewInstantTime();
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
createNextDeltaCommit(instantTime3, dataGen.generateUpdates(instantTime3, 100), writeClient, metaClient, inlineCfg, false);
}
// Then: 1 delta commit is done, the failed compaction is retried
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
assertEquals(instantTime2, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.
the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnNumAndTime.
@Test
public void testCompactionRetryOnFailureBasedOnNumAndTime() throws Exception {
// Given: two commits, schedule compaction and its failed/in-flight
HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxDeltaSecondsBeforeCompaction(1).withMaxNumDeltaCommitsBeforeCompaction(1).withInlineCompactionTriggerStrategy(CompactionTriggerStrategy.NUM_AND_TIME).build()).build();
String instantTime;
List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 10);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
// Schedule compaction instantTime, make it in-flight (simulates inline compaction failing)
instantTime = HoodieActiveTimeline.createNewInstantTime();
scheduleCompaction(instantTime, writeClient, cfg);
moveCompactionFromRequestedToInflight(instantTime, cfg);
}
// When: a third commit happens
HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_OR_TIME);
String instantTime2;
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
instantTime2 = HoodieActiveTimeline.createNewInstantTime();
createNextDeltaCommit(instantTime2, dataGen.generateUpdates(instantTime2, 10), writeClient, metaClient, inlineCfg, false);
}
// Then: 1 delta commit is done, the failed compaction is retried
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
assertEquals(instantTime, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.
the class TestInlineCompaction method testSuccessfulCompactionBasedOnNumAndTime.
@Test
public void testSuccessfulCompactionBasedOnNumAndTime() throws Exception {
// Given: make three commits
HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_AND_TIME);
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 10);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
List<String> instants = IntStream.range(0, 3).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
// Then: ensure no compaction is executed since there are only 3 delta commits
assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
// 4th commit, that will trigger compaction
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
String finalInstant = HoodieActiveTimeline.createNewInstantTime(20000);
createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(5, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
}
}
Aggregations