Search in sources :

Example 11 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveCommitTimeline.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveCommitTimeline(boolean enableMetadataTable) throws Exception {
    init();
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTestDataGenerator.createCommitFile(basePath, "1", wrapperFs.getConf());
    HoodieInstant instant1 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
    HoodieTestDataGenerator.createCommitFile(basePath, "2", wrapperFs.getConf());
    Path markerPath = new Path(metaClient.getMarkerFolderPath("2"));
    wrapperFs.mkdirs(markerPath);
    HoodieInstant instant2 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
    HoodieTestDataGenerator.createCommitFile(basePath, "3", wrapperFs.getConf());
    HoodieInstant instant3 = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
    // add 2 more instants to pass filter criteria set in compaction config above
    HoodieTestDataGenerator.createCommitFile(basePath, "4", wrapperFs.getConf());
    HoodieTestDataGenerator.createCommitFile(basePath, "5", wrapperFs.getConf());
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "5");
    }
    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    boolean result = archiver.archiveIfRequired(context);
    assertTrue(result);
    HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
    List<HoodieInstant> archivedInstants = Arrays.asList(instant1, instant2, instant3);
    assertEquals(new HashSet<>(archivedInstants), archivedTimeline.filterCompletedInstants().getInstants().collect(Collectors.toSet()));
    assertFalse(wrapperFs.exists(markerPath));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 12 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveEmptyTable.

@Test
public void testArchiveEmptyTable() throws Exception {
    init();
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").build();
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    boolean result = archiver.archiveIfRequired(context);
    assertTrue(result);
}
Also used : HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 13 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestHoodieTimelineArchiver method testMergeSmallArchiveFilesRecoverFromBuildPlanFailed.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableArchiveMerge) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
    // do ingestion and trigger archive actions here.
    for (int i = 1; i < 8; i++) {
        testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        archiveAndGetCommitsList(writeConfig);
    }
    // build a merge small archive plan with dummy content
    // this plan can not be deserialized.
    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
    FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
    List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
    archiver.reOpenWriter();
    Path plan = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
    archiver.buildArchiveMergePlan(candidateFiles, plan, ".commits_.archive.3_1-0-1");
    String s = "Dummy Content";
    // stain the current merge plan file.
    FileIOUtils.createFileInPath(metaClient.getFs(), plan, Option.of(s.getBytes()));
    // check that damaged plan file will not block archived timeline loading.
    HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
    HoodieArchivedTimeline archivedTimeLine = metaClient.getArchivedTimeline().reload();
    assertEquals(7 * 3, rawActiveTimeline.countInstants() + archivedTimeLine.countInstants());
    // trigger several archive after left damaged merge small archive file plan.
    for (int i = 1; i < 10; i++) {
        testTable.doWriteOperation("1000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        archiveAndGetCommitsList(writeConfig);
    }
    // loading archived timeline and active timeline success
    HoodieActiveTimeline rawActiveTimeline1 = new HoodieActiveTimeline(metaClient, false);
    HoodieArchivedTimeline archivedTimeLine1 = metaClient.getArchivedTimeline().reload();
    // check instant number
    assertEquals(16 * 3, archivedTimeLine1.countInstants() + rawActiveTimeline1.countInstants());
    // if there are damaged archive files and damaged plan, hoodie need throw ioe while loading archived timeline.
    Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
    FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
    assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieTestUtils.createCompactionCommitInMetadataTable(org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) CsvSource(org.junit.jupiter.params.provider.CsvSource) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) MetadataConversionUtils(org.apache.hudi.client.utils.MetadataConversionUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Comparator(java.util.Comparator) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 14 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestCommitsCommand method testShowArchivedCommits.

/**
 * Test case of 'commits showarchived' command.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommits(boolean enableMetadataTable) throws Exception {
    // Generate archive
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
    // generate data and metadata
    Map<String, Integer[]> data = new LinkedHashMap<>();
    data.put("104", new Integer[] { 20, 10 });
    data.put("103", new Integer[] { 15, 15 });
    data.put("102", new Integer[] { 25, 45 });
    data.put("101", new Integer[] { 35, 15 });
    for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
        String key = entry.getKey();
        Integer[] value = entry.getValue();
        HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
    }
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "104");
    }
    // archive
    metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
    HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    archiver.archiveIfRequired(context());
    CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
    assertTrue(cr.isSuccess());
    // archived 101 and 102 instant, generate expect data
    assertEquals(2, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 2 instants not be archived!");
    // archived 101 and 102 instants, remove 103 and 104 instant
    data.remove("103");
    data.remove("104");
    String expected = generateExpectData(1, data);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) CommandResult(org.springframework.shell.core.CommandResult) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 15 with HoodieTimelineArchiver

use of org.apache.hudi.client.HoodieTimelineArchiver in project hudi by apache.

the class TestCommitsCommand method testShowArchivedCommitsWithMultiCommitsFile.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommitsWithMultiCommitsFile(boolean enableMetadataTable) throws Exception {
    // Generate archive
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
    // generate data and metadata
    Map<String, Integer[]> data = new LinkedHashMap<>();
    for (int i = 194; i >= 154; i--) {
        data.put(String.valueOf(i), new Integer[] { i, i });
    }
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "194");
    }
    for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
        String key = entry.getKey();
        Integer[] value = entry.getValue();
        HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
        // archive
        metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
        HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
        // need to create multi archive files
        HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
        archiver.archiveIfRequired(context());
    }
    CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "160", "174"));
    assertTrue(cr.isSuccess());
    assertEquals(3, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 3 instants not be archived!");
    Map<String, Integer[]> data2 = new LinkedHashMap<>();
    for (int i = 174; i >= 161; i--) {
        data2.put(String.valueOf(i), new Integer[] { i, i });
    }
    String expected = generateExpectData(1, data2);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) CommandResult(org.springframework.shell.core.CommandResult) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieTimelineArchiver (org.apache.hudi.client.HoodieTimelineArchiver)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)14 HoodieTable (org.apache.hudi.table.HoodieTable)12 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)12 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)11 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)8 ValueSource (org.junit.jupiter.params.provider.ValueSource)8 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)7 HashMap (java.util.HashMap)6 Map (java.util.Map)6 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)6 ArrayList (java.util.ArrayList)5 List (java.util.List)5 Path (org.apache.hadoop.fs.Path)5 HoodieArchivedTimeline (org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)5 Test (org.junit.jupiter.api.Test)5 IOException (java.io.IOException)4 Arrays (java.util.Arrays)4 Collections (java.util.Collections)4 Comparator (java.util.Comparator)4