Search in sources :

Example 6 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testLoadArchiveTimelineWithUncompletedMergeArchiveFile.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testLoadArchiveTimelineWithUncompletedMergeArchiveFile(boolean enableArchiveMerge) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
    for (int i = 1; i < 8; i++) {
        testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        archiveAndGetCommitsList(writeConfig);
    }
    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
    FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
    List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
    archiver.reOpenWriter();
    archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
    archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
    HoodieLogFormat.Writer writer = archiver.reOpenWriter();
    String s = "Dummy Content";
    // stain the current merged archive file.
    FileIOUtils.createFileInPath(metaClient.getFs(), writer.getLogFile().getPath(), Option.of(s.getBytes()));
    // if there's only a damaged merged archive file, we need to ignore the exception while reading this damaged file.
    HoodieActiveTimeline rawActiveTimeline1 = new HoodieActiveTimeline(metaClient, false);
    HoodieArchivedTimeline archivedTimeLine1 = metaClient.getArchivedTimeline();
    assertEquals(7 * 3, archivedTimeLine1.countInstants() + rawActiveTimeline1.countInstants());
    // if there are a damaged merged archive files and other common damaged archive file.
    // hoodie need throw ioe while loading archived timeline because of parsing the damaged archive file.
    Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
    FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
    assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieTestUtils.createCompactionCommitInMetadataTable(org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) CsvSource(org.junit.jupiter.params.provider.CsvSource) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) MetadataConversionUtils(org.apache.hudi.client.utils.MetadataConversionUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Comparator(java.util.Comparator) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTable(org.apache.hudi.table.HoodieTable) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 7 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveCommitSavepointNoHole.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveCommitSavepointNoHole(boolean enableMetadataTable) throws Exception {
    init();
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
    HoodieTestDataGenerator.createCommitFile(basePath, "100", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "101", wrapperFs.getConf());
    HoodieTestDataGenerator.createSavepointFile(basePath, "101", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "102", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "103", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "104", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "105", wrapperFs.getConf());
    HoodieTable table = HoodieSparkTable.create(cfg, context);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
    }
    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
    assertTrue(archiver.archiveIfRequired(context));
    timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
    assertEquals(5, timeline.countInstants(), "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
    assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")), "Archived commits should always be safe");
    assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")), "Archived commits should always be safe");
    assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")), "Archived commits should always be safe");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveCommitsWithCompactionCommitInMetadataTableTimeline.

@Test
public void testArchiveCommitsWithCompactionCommitInMetadataTableTimeline() throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 20);
    int startInstantTime = 100;
    int numCommits = 15;
    // "100" till "105" should be archived in this case
    int numExpectedArchived = 6;
    for (int i = startInstantTime; i < startInstantTime + numCommits; i++) {
        HoodieTestDataGenerator.createCommitFile(basePath, Integer.toString(i), wrapperFs.getConf());
    }
    // Simulate a compaction commit in metadata table timeline
    // so the archival in data table can happen
    createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
    HoodieTable table = HoodieSparkTable.create(writeConfig, context);
    HoodieTimelineArchiver archiveLog = new HoodieTimelineArchiver(writeConfig, table);
    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
    assertEquals(numCommits, timeline.countInstants(), String.format("Loaded %d commits and the count should match", numCommits));
    assertTrue(archiveLog.archiveIfRequired(context));
    timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
    assertEquals(numCommits - numExpectedArchived, timeline.countInstants(), "Since we have a compaction commit of 105 in metadata table timeline, we should never archive any commit after that");
    for (int i = startInstantTime + numExpectedArchived; i < startInstantTime + numCommits; i++) {
        assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, Integer.toString(i))), String.format("Commit %d should not be archived", i));
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveInflightClean.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveInflightClean(boolean enableMetadataTable) throws Exception {
    init();
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    createCleanMetadata("10", false);
    createCleanMetadata("11", false);
    HoodieInstant notArchivedInstant1 = createCleanMetadata("12", false);
    HoodieInstant notArchivedInstant2 = createCleanMetadata("13", false);
    HoodieInstant notArchivedInstant3 = createCleanMetadata("14", true);
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "14");
    }
    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    archiver.archiveIfRequired(context);
    List<HoodieInstant> notArchivedInstants = metaClient.getActiveTimeline().reload().getInstants().collect(Collectors.toList());
    assertEquals(3, notArchivedInstants.size(), "Not archived instants should be 3");
    assertEquals(notArchivedInstants, Arrays.asList(notArchivedInstant1, notArchivedInstant2, notArchivedInstant3), "");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 10 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method archiveAndGetCommitsList.

private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
    metaClient.reloadActiveTimeline();
    HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
    List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
    archiver.archiveIfRequired(context);
    timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
    List<HoodieInstant> commitsAfterArchival = timeline.getInstants().collect(Collectors.toList());
    return Pair.of(originalCommits, commitsAfterArchival);
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTable(org.apache.hudi.table.HoodieTable)

Aggregations

HoodieTimelineArchiver (org.apache.hudi.client.HoodieTimelineArchiver)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 HoodieTable (org.apache.hudi.table.HoodieTable)12 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)11 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)8 ValueSource (org.junit.jupiter.params.provider.ValueSource)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)7 HashMap (java.util.HashMap)6 Map (java.util.Map)6 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Path (org.apache.hadoop.fs.Path)5 HoodieArchivedTimeline (org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)5 Test (org.junit.jupiter.api.Test)5 IOException (java.io.IOException)4 Arrays (java.util.Arrays)4 Collections (java.util.Collections)4 Comparator (java.util.Comparator)4