Search in sources :

Example 26 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestAsyncCompaction method testRollbackInflightIngestionWithPendingCompaction.

@Test
public void testRollbackInflightIngestionWithPendingCompaction() throws Exception {
    // Rollback inflight ingestion when there is pending compaction
    HoodieWriteConfig cfg = getConfig(false);
    String firstInstantTime = "001";
    String secondInstantTime = "004";
    String compactionInstantTime = "005";
    String inflightInstantTime = "006";
    String nextInflightInstantTime = "007";
    int numRecs = 2000;
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
        // This should rollback
        client.startCommitWithTime(nextInflightInstantTime);
        // Validate
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
        assertEquals(1, metaClient.getActiveTimeline().filterPendingExcludingCompaction().getInstants().count(), "Expect only one inflight instant");
        // Expect pending Compaction to be present
        pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 27 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestAsyncCompaction method testRollbackForInflightCompaction.

@Test
public void testRollbackForInflightCompaction() throws Exception {
    // Rollback inflight compaction
    HoodieWriteConfig cfg = getConfig(false);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        assertEquals(State.REQUESTED, pendingCompactionInstant.getState(), "Pending Compaction instant has expected state");
        moveCompactionFromRequestedToInflight(compactionInstantTime, cfg);
        // Reload and rollback inflight compaction
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context, metaClient);
        hoodieTable.rollbackInflightCompaction(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionInstantTime));
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        pendingCompactionInstant = metaClient.getCommitsAndCompactionTimeline().filterPendingCompactionTimeline().getInstants().findFirst().get();
        assertEquals("compaction", pendingCompactionInstant.getAction());
        assertEquals(State.REQUESTED, pendingCompactionInstant.getState());
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp());
        // We indirectly test for the race condition where a inflight instant was first deleted then created new. Every
        // time this happens, the pending compaction instant file in Hoodie Meta path becomes an empty file (Note: Hoodie
        // reads compaction plan from aux path which is untouched). TO test for regression, we simply get file status
        // and look at the file size
        FileStatus fstatus = metaClient.getFs().getFileStatus(new Path(metaClient.getMetaPath(), pendingCompactionInstant.getFileName()));
        assertTrue(fstatus.getLen() > 0);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) Test(org.junit.jupiter.api.Test)

Example 28 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestAsyncCompaction method testCompactionOnReplacedFiles.

@Test
public void testCompactionOnReplacedFiles() throws Exception {
    // Schedule a compaction. Replace those file groups and ensure compaction completes successfully.
    HoodieWriteConfig cfg = getConfig(true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        String firstInstantTime = "001";
        String secondInstantTime = "004";
        String compactionInstantTime = "005";
        String replaceInstantTime = "006";
        String fourthInstantTime = "007";
        int numRecs = 2000;
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
        scheduleCompaction(compactionInstantTime, client, cfg);
        metaClient.reloadActiveTimeline();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        Set<HoodieFileGroupId> fileGroupsBeforeReplace = getAllFileGroups(hoodieTable, dataGen.getPartitionPaths());
        // replace by using insertOverwrite
        JavaRDD<HoodieRecord> replaceRecords = jsc.parallelize(dataGen.generateInserts(replaceInstantTime, numRecs), 1);
        client.startCommitWithTime(replaceInstantTime, HoodieTimeline.REPLACE_COMMIT_ACTION);
        client.insertOverwrite(replaceRecords, replaceInstantTime);
        metaClient.reloadActiveTimeline();
        hoodieTable = getHoodieTable(metaClient, cfg);
        Set<HoodieFileGroupId> newFileGroups = getAllFileGroups(hoodieTable, dataGen.getPartitionPaths());
        // make sure earlier file groups are not visible
        assertEquals(0, newFileGroups.stream().filter(fg -> fileGroupsBeforeReplace.contains(fg)).count());
        // compaction should run with associated file groups are replaced
        executeCompactionWithReplacedFiles(compactionInstantTime, client, hoodieTable, cfg, dataGen.getPartitionPaths(), fileGroupsBeforeReplace);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTable(org.apache.hudi.table.HoodieTable) Test(org.junit.jupiter.api.Test)

Example 29 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestAsyncCompaction method testScheduleCompactionAfterPendingIngestion.

@Test
public void testScheduleCompactionAfterPendingIngestion() throws Exception {
    // Case: Failure case. Earliest ingestion inflight instant time must be later than compaction time
    HoodieWriteConfig cfg = getConfig(false);
    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
    HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
    String firstInstantTime = "001";
    String secondInstantTime = "004";
    String inflightInstantTime = "005";
    String compactionInstantTime = "006";
    int numRecs = 2000;
    List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
    records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
    metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
    assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
    assertThrows(IllegalArgumentException.class, () -> {
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
    }, "Earliest ingestion inflight instant time must be later than compaction time");
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 30 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestCopyOnWriteRollbackActionExecutor method testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile.

@Test
public void testCopyOnWriteRollbackActionExecutorForFileListingAsGenerateFile() throws Exception {
    final String p1 = "2015/03/16";
    final String p2 = "2015/03/17";
    final String p3 = "2016/03/15";
    // Let's create some commit files and base files
    HoodieTestTable testTable = HoodieTestTable.of(metaClient).withPartitionMetaFiles(p1, p2, p3).addCommit("001").withBaseFilesInPartition(p1, "id11").withBaseFilesInPartition(p2, "id12").withLogFile(p1, "id11", 3).addCommit("002").withBaseFilesInPartition(p1, "id21").withBaseFilesInPartition(p2, "id22");
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(false).build();
    HoodieTable table = this.getHoodieTable(metaClient, writeConfig);
    HoodieInstant needRollBackInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "002");
    // execute CopyOnWriteRollbackActionExecutor with filelisting mode
    BaseRollbackPlanActionExecutor copyOnWriteRollbackPlanActionExecutor = new BaseRollbackPlanActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, false, table.getConfig().shouldRollbackUsingMarkers());
    HoodieRollbackPlan rollbackPlan = (HoodieRollbackPlan) copyOnWriteRollbackPlanActionExecutor.execute().get();
    CopyOnWriteRollbackActionExecutor copyOnWriteRollbackActionExecutor = new CopyOnWriteRollbackActionExecutor(context, table.getConfig(), table, "003", needRollBackInstant, true, false);
    List<HoodieRollbackStat> hoodieRollbackStats = copyOnWriteRollbackActionExecutor.executeRollback(rollbackPlan);
    // assert hoodieRollbackStats
    assertEquals(hoodieRollbackStats.size(), 3);
    for (HoodieRollbackStat stat : hoodieRollbackStats) {
        switch(stat.getPartitionPath()) {
            case p1:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p1, "id21").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p2:
                assertEquals(1, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                assertEquals(testTable.forCommit("002").getBaseFilePath(p2, "id22").toString(), this.fs.getScheme() + ":" + stat.getSuccessDeleteFiles().get(0));
                break;
            case p3:
                assertEquals(0, stat.getSuccessDeleteFiles().size());
                assertEquals(0, stat.getFailedDeleteFiles().size());
                assertEquals(Collections.EMPTY_MAP, stat.getCommandBlocksCount());
                break;
            default:
                fail("Unexpected partition: " + stat.getPartitionPath());
        }
    }
    assertTrue(testTable.inflightCommitExists("001"));
    assertTrue(testTable.commitExists("001"));
    assertTrue(testTable.baseFileExists(p1, "001", "id11"));
    assertTrue(testTable.baseFileExists(p2, "001", "id12"));
    assertFalse(testTable.inflightCommitExists("002"));
    assertFalse(testTable.commitExists("002"));
    assertFalse(testTable.baseFileExists(p1, "002", "id21"));
    assertFalse(testTable.baseFileExists(p2, "002", "id22"));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)323 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)129 ArrayList (java.util.ArrayList)118 List (java.util.List)116 IOException (java.io.IOException)112 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)104 Test (org.junit.jupiter.api.Test)97 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)96 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)89 Map (java.util.Map)84 Option (org.apache.hudi.common.util.Option)84 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)84 Collectors (java.util.stream.Collectors)83 HashMap (java.util.HashMap)81 Path (org.apache.hadoop.fs.Path)78 Pair (org.apache.hudi.common.util.collection.Pair)71 Logger (org.apache.log4j.Logger)67 LogManager (org.apache.log4j.LogManager)66 HoodieIOException (org.apache.hudi.exception.HoodieIOException)65 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)61