Search in sources :

Example 1 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestAsyncCompaction method testRollbackInflightIngestionWithPendingCompaction.

@Test
public void testRollbackInflightIngestionWithPendingCompaction() throws Exception {
    // Rollback inflight ingestion when there is pending compaction
    HoodieWriteConfig cfg = getConfig(false);
    String firstInstantTime = "001";
    String secondInstantTime = "004";
    String compactionInstantTime = "005";
    String inflightInstantTime = "006";
    String nextInflightInstantTime = "007";
    int numRecs = 2000;
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
        // This should rollback
        client.startCommitWithTime(nextInflightInstantTime);
        // Validate
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
        assertEquals(1, metaClient.getActiveTimeline().filterPendingExcludingCompaction().getInstants().count(), "Expect only one inflight instant");
        // Expect pending Compaction to be present
        pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 2 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestAsyncCompaction method testRollbackForInflightCompaction.

@Test
public void testRollbackForInflightCompaction() throws Exception {
    // Rollback inflight compaction
    HoodieWriteConfig cfg = getConfig(false);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        assertEquals(State.REQUESTED, pendingCompactionInstant.getState(), "Pending Compaction instant has expected state");
        moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
        // Reload and rollback inflight compaction
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context, metaClient);
        hoodieTable.rollbackInflightCompaction(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionInstantTime));
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        pendingCompactionInstant = metaClient.getCommitsAndCompactionTimeline().filterPendingCompactionTimeline().getInstants().findFirst().get();
        assertEquals("compaction", pendingCompactionInstant.getAction());
        assertEquals(State.REQUESTED, pendingCompactionInstant.getState());
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp());
        // We indirectly test for the race condition where a inflight instant was first deleted then created new. Every
        // time this happens, the pending compaction instant file in Hoodie Meta path becomes an empty file (Note: Hoodie
        // reads compaction plan from aux path which is untouched). TO test for regression, we simply get file status
        // and look at the file size
        FileStatus fstatus = metaClient.getFs().getFileStatus(new Path(metaClient.getMetaPath(), pendingCompactionInstant.getFileName()));
        assertTrue(fstatus.getLen() > 0);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) Test(org.junit.jupiter.api.Test)

Example 3 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestAsyncCompaction method testInflightCompaction.

@Test
public void testInflightCompaction() throws Exception {
    // There is inflight compaction. Subsequent compaction run must work correctly
    HoodieWriteConfig cfg = getConfig(true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        String thirdInstantTime = "006";
        String fourthInstantTime = "007";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule and mark compaction instant as inflight
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
        scheduleCompaction(compactionInstantTime, client, cfg);
        moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
        // Complete ingestions
        runNextDeltaCommits(client, readClient, Arrays.asList(thirdInstantTime, fourthInstantTime), records, cfg, false, Arrays.asList(compactionInstantTime));
        // execute inflight compaction
        executeCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, true);
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 4 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestAsyncCompaction method testCompactionOnReplacedFiles.

@Test
public void testCompactionOnReplacedFiles() throws Exception {
    // Schedule a compaction. Replace those file groups and ensure compaction completes successfully.
    HoodieWriteConfig cfg = getConfig(true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        String replaceInstantTime = "006";
        String fourthInstantTime = "007";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
        scheduleCompaction(compactionInstantTime, client, cfg);
        metaClient.reloadActiveTimeline();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        Set<HoodieFileGroupId> fileGroupsBeforeReplace = getAllFileGroups(hoodieTable, dataGen.getPartitionPaths());
        // replace by using insertOverwrite
        JavaRDD<HoodieRecord> replaceRecords = jsc.parallelize(dataGen.generateInserts(replaceInstantTime, numRecs), 1);
        client.startCommitWithTime(replaceInstantTime, HoodieTimeline.REPLACE_COMMIT_ACTION);
        client.insertOverwrite(replaceRecords, replaceInstantTime);
        metaClient.reloadActiveTimeline();
        hoodieTable = getHoodieTable(metaClient, cfg);
        Set<HoodieFileGroupId> newFileGroups = getAllFileGroups(hoodieTable, dataGen.getPartitionPaths());
        // make sure earlier file groups are not visible
        assertEquals(0, newFileGroups.stream().filter(fg -> fileGroupsBeforeReplace.contains(fg)).count());
        // compaction should run with associated file groups are replaced
        executeCompactionWithReplacedFiles(compactionInstantTime, client, hoodieTable, cfg, dataGen.getPartitionPaths(), fileGroupsBeforeReplace);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTable(org.apache.hudi.table.HoodieTable) Test(org.junit.jupiter.api.Test)

Example 5 with HoodieReadClient

use of org.apache.hudi.client.HoodieReadClient in project hudi by apache.

the class TestAsyncCompaction method testScheduleCompactionAfterPendingIngestion.

@Test
public void testScheduleCompactionAfterPendingIngestion() throws Exception {
    // Case: Failure case. Earliest ingestion inflight instant time must be later than compaction time
    HoodieWriteConfig cfg = getConfig(false);
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
    String firstInstantTime = "001";
    String secondInstantTime = "004";
    String inflightInstantTime = "005";
    String compactionInstantTime = "006";
    int numRecs = 2000;
    List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
    records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
    metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
    assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
    assertThrows(IllegalArgumentException.class, () -> {
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
    }, "Earliest ingestion inflight instant time must be later than compaction time");
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieReadClient (org.apache.hudi.client.HoodieReadClient)18 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)18 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)18 Test (org.junit.jupiter.api.Test)18 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)17 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)17 ArrayList (java.util.ArrayList)11 Arrays (java.util.Arrays)8 List (java.util.List)8 Collectors (java.util.stream.Collectors)8 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)8 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)8 Assertions.assertFalse (org.junit.jupiter.api.Assertions.assertFalse)8 IntStream (java.util.stream.IntStream)7 HoodieCompactionConfig (org.apache.hudi.config.HoodieCompactionConfig)7 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)7 WriteMarkersFactory (org.apache.hudi.table.marker.WriteMarkersFactory)7 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)6 HoodieTable (org.apache.hudi.table.HoodieTable)5