Search in sources :

Example 21 with HoodieSparkTable

use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.

the class TestCommitsCommand method testShowArchivedCommits.

/**
 * Test case of 'commits showarchived' command.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommits(boolean enableMetadataTable) throws Exception {
    // Generate archive
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
    // generate data and metadata
    Map<String, Integer[]> data = new LinkedHashMap<>();
    data.put("104", new Integer[] { 20, 10 });
    data.put("103", new Integer[] { 15, 15 });
    data.put("102", new Integer[] { 25, 45 });
    data.put("101", new Integer[] { 35, 15 });
    for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
        String key = entry.getKey();
        Integer[] value = entry.getValue();
        HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
    }
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "104");
    }
    // archive
    metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
    HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    archiver.archiveIfRequired(context());
    CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
    assertTrue(cr.isSuccess());
    // archived 101 and 102 instant, generate expect data
    assertEquals(2, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 2 instants not be archived!");
    // archived 101 and 102 instants, remove 103 and 104 instant
    data.remove("103");
    data.remove("104");
    String expected = generateExpectData(1, data);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) CommandResult(org.springframework.shell.core.CommandResult) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 22 with HoodieSparkTable

use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.

the class TestCommitsCommand method testShowArchivedCommitsWithMultiCommitsFile.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testShowArchivedCommitsWithMultiCommitsFile(boolean enableMetadataTable) throws Exception {
    // Generate archive
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).forTable("test-trip-table").build();
    // generate data and metadata
    Map<String, Integer[]> data = new LinkedHashMap<>();
    for (int i = 194; i >= 154; i--) {
        data.put(String.valueOf(i), new Integer[] { i, i });
    }
    if (enableMetadataTable) {
        // Simulate a compaction commit in metadata table timeline
        // so the archival in data table can happen
        createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "194");
    }
    for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
        String key = entry.getKey();
        Integer[] value = entry.getValue();
        HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath1, key, hadoopConf(), Option.of(value[0]), Option.of(value[1]));
        // archive
        metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
        HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
        // need to create multi archive files
        HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
        archiver.archiveIfRequired(context());
    }
    CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "160", "174"));
    assertTrue(cr.isSuccess());
    assertEquals(3, metaClient.reloadActiveTimeline().getCommitsTimeline().countInstants(), "There should 3 instants not be archived!");
    Map<String, Integer[]> data2 = new LinkedHashMap<>();
    for (int i = 174; i >= 161; i--) {
        data2.put(String.valueOf(i), new Integer[] { i, i });
    }
    String expected = generateExpectData(1, data2);
    expected = removeNonWordAndStripSpace(expected);
    String got = removeNonWordAndStripSpace(cr.getResult().toString());
    assertEquals(expected, got);
}
Also used : HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) CommandResult(org.springframework.shell.core.CommandResult) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 23 with HoodieSparkTable

use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.

the class TestCompactionCommand method generateArchive.

private void generateArchive() throws IOException {
    // Generate archive
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath).withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).forTable("test-trip-table").build();
    // archive
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
    HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
    archiver.archiveIfRequired(context());
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable)

Example 24 with HoodieSparkTable

use of org.apache.hudi.table.HoodieSparkTable in project hudi by apache.

the class SparkMain method deleteMarker.

protected static int deleteMarker(JavaSparkContext jsc, String instantTime, String basePath) {
    try {
        SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
        HoodieWriteConfig config = client.getConfig();
        HoodieEngineContext context = client.getEngineContext();
        HoodieSparkTable table = HoodieSparkTable.create(config, context, true);
        WriteMarkersFactory.get(config.getMarkersType(), table, instantTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
        return 0;
    } catch (Exception e) {
        LOG.warn(String.format("Failed: Could not clean marker instantTime: \"%s\".", instantTime), e);
        return -1;
    }
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieEngineContext(org.apache.hudi.common.engine.HoodieEngineContext) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieSavepointException(org.apache.hudi.exception.HoodieSavepointException) IOException(java.io.IOException) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable)

Aggregations

HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)24 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)22 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)17 Test (org.junit.jupiter.api.Test)14 Map (java.util.Map)9 Arrays (java.util.Arrays)7 HashMap (java.util.HashMap)7 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)7 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)7 IOException (java.io.IOException)6 List (java.util.List)6 Schema (org.apache.avro.Schema)6 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)6 Option (org.apache.hudi.common.util.Option)6 ArrayList (java.util.ArrayList)5 Collectors (java.util.stream.Collectors)5 Path (org.apache.hadoop.fs.Path)5 HoodieKey (org.apache.hudi.common.model.HoodieKey)5 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)5 RawTripTestPayload (org.apache.hudi.common.testutils.RawTripTestPayload)5