use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestInlineCompaction method testSuccessfulCompactionBasedOnTime.
@Test
public void testSuccessfulCompactionBasedOnTime() throws Exception {
// Given: make one commit
HoodieWriteConfig cfg = getConfigForInlineCompaction(5, 10, CompactionTriggerStrategy.TIME_ELAPSED);
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
String instantTime = HoodieActiveTimeline.createNewInstantTime();
List<HoodieRecord> records = dataGen.generateInserts(instantTime, 10);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
runNextDeltaCommits(writeClient, readClient, Arrays.asList(instantTime), records, cfg, true, new ArrayList<>());
// after 10s, that will trigger compaction
String finalInstant = HoodieActiveTimeline.createNewInstantTime(10000);
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 100), writeClient, metaClient, cfg, false);
// Then: ensure the file slices are compacted as per policy
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
assertEquals(HoodieTimeline.COMMIT_ACTION, metaClient.getActiveTimeline().lastInstant().get().getAction());
}
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestInlineCompaction method testSuccessfulCompactionBasedOnNumOrTime.
@Test
public void testSuccessfulCompactionBasedOnNumOrTime() throws Exception {
// Given: make three commits
HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_OR_TIME);
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 10);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
// Then: trigger the compaction because reach 3 commits.
String finalInstant = HoodieActiveTimeline.createNewInstantTime();
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
// 4th commit, that will trigger compaction because reach the time elapsed
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
finalInstant = HoodieActiveTimeline.createNewInstantTime(20000);
createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(6, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
}
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnTime.
@Test
public void testCompactionRetryOnFailureBasedOnTime() throws Exception {
// Given: two commits, schedule compaction and its failed/in-flight
HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxDeltaSecondsBeforeCompaction(5).withInlineCompactionTriggerStrategy(CompactionTriggerStrategy.TIME_ELAPSED).build()).build();
String instantTime;
List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 100);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
// Schedule compaction instantTime, make it in-flight (simulates inline compaction failing)
instantTime = HoodieActiveTimeline.createNewInstantTime(10000);
scheduleCompaction(instantTime, writeClient, cfg);
moveCompactionFromRequestedToInflight(instantTime, cfg);
}
// When: commit happens after 10s
HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(5, 10, CompactionTriggerStrategy.TIME_ELAPSED);
String instantTime2;
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
instantTime2 = HoodieActiveTimeline.createNewInstantTime();
createNextDeltaCommit(instantTime2, dataGen.generateUpdates(instantTime2, 10), writeClient, metaClient, inlineCfg, false);
}
// Then: 1 delta commit is done, the failed compaction is retried
metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
assertEquals(instantTime, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestInlineCompaction method testCompactionIsNotScheduledEarly.
@Test
public void testCompactionIsNotScheduledEarly() throws Exception {
// Given: make two commits
HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 60, CompactionTriggerStrategy.NUM_COMMITS);
try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 100);
HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
// Then: ensure no compaction is executed since there are only 2 delta commits
assertEquals(2, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
}
}
use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.
the class TestHoodieCompactionStrategy method testUnboundedPartitionAwareCompactionSimple.
@Test
public void testUnboundedPartitionAwareCompactionSimple() {
Map<Long, List<Long>> sizesMap = new HashMap<>();
sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
sizesMap.put(110 * MB, new ArrayList<>());
sizesMap.put(100 * MB, Collections.singletonList(MB));
sizesMap.put(80 * MB, Collections.singletonList(MB));
sizesMap.put(70 * MB, Collections.singletonList(MB));
sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd");
Date today = new Date();
String currentDay = format.format(today);
String currentDayMinus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-1));
String currentDayMinus2 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-2));
String currentDayMinus3 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-3));
String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {
{
put(120 * MB, currentDay);
put(110 * MB, currentDayMinus1);
put(100 * MB, currentDayMinus2);
put(80 * MB, currentDayMinus3);
put(90 * MB, currentDayPlus1);
put(70 * MB, currentDayPlus5);
}
});
UnBoundedPartitionAwareCompactionStrategy strategy = new UnBoundedPartitionAwareCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue(returned.size() < operations.size(), "UnBoundedPartitionAwareCompactionStrategy should not include last " + writeConfig.getTargetPartitionsPerDayBasedCompaction() + " partitions or later partitions from today");
assertEquals(1, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in 1 compaction");
}
Aggregations