Search in sources :

Example 16 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnNumCommits.

@Test
public void testCompactionRetryOnFailureBasedOnNumCommits() throws Exception {
    // Given: two commits, schedule compaction and its failed/in-flight
    HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build()).build();
    List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
    String instantTime2;
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 100);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        // Schedule compaction instant2, make it in-flight (simulates inline compaction failing)
        instantTime2 = HoodieActiveTimeline.createNewInstantTime();
        scheduleCompaction(instantTime2, writeClient, cfg);
        moveCompactionFromRequestedToInflight(instantTime2, cfg);
    }
    // When: a third commit happens
    HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(2, 60, CompactionTriggerStrategy.NUM_COMMITS);
    String instantTime3 = HoodieActiveTimeline.createNewInstantTime();
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(instantTime3, dataGen.generateUpdates(instantTime3, 100), writeClient, metaClient, inlineCfg, false);
    }
    // Then: 1 delta commit is done, the failed compaction is retried
    metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    assertEquals(instantTime2, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 17 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnNumAndTime.

@Test
public void testCompactionRetryOnFailureBasedOnNumAndTime() throws Exception {
    // Given: two commits, schedule compaction and its failed/in-flight
    HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxDeltaSecondsBeforeCompaction(1).withMaxNumDeltaCommitsBeforeCompaction(1).withInlineCompactionTriggerStrategy(CompactionTriggerStrategy.NUM_AND_TIME).build()).build();
    String instantTime;
    List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 10);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        // Schedule compaction instantTime, make it in-flight (simulates inline compaction failing)
        instantTime = HoodieActiveTimeline.createNewInstantTime();
        scheduleCompaction(instantTime, writeClient, cfg);
        moveCompactionFromRequestedToInflight(instantTime, cfg);
    }
    // When: a third commit happens
    HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_OR_TIME);
    String instantTime2;
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        instantTime2 = HoodieActiveTimeline.createNewInstantTime();
        createNextDeltaCommit(instantTime2, dataGen.generateUpdates(instantTime2, 10), writeClient, metaClient, inlineCfg, false);
    }
    // Then: 1 delta commit is done, the failed compaction is retried
    metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    assertEquals(instantTime, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 18 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestInlineCompaction method testSuccessfulCompactionBasedOnNumAndTime.

@Test
public void testSuccessfulCompactionBasedOnNumAndTime() throws Exception {
    // Given: make three commits
    HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_AND_TIME);
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 10);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<String> instants = IntStream.range(0, 3).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        // Then: ensure no compaction is executed since there are only 3 delta commits
        assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
        // 4th commit, that will trigger compaction
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        String finalInstant = HoodieActiveTimeline.createNewInstantTime(20000);
        createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        assertEquals(5, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    }
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieReadClient (org.apache.hudi.client.HoodieReadClient)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)18 Test (org.junit.jupiter.api.Test)18 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)17 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)17 ArrayList (java.util.ArrayList)11 Arrays (java.util.Arrays)8 List (java.util.List)8 Collectors (java.util.stream.Collectors)8 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)8 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)8 Assertions.assertFalse (org.junit.jupiter.api.Assertions.assertFalse)8 IntStream (java.util.stream.IntStream)7 HoodieCompactionConfig (org.apache.hudi.config.HoodieCompactionConfig)7 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)7 WriteMarkersFactory (org.apache.hudi.table.marker.WriteMarkersFactory)7 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)6 HoodieTable (org.apache.hudi.table.HoodieTable)5