Search in sources :

Example 51 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestInlineCompaction method testSuccessfulCompactionBasedOnTime.

@Test
public void testSuccessfulCompactionBasedOnTime() throws Exception {
    // Given: make one commit
    HoodieWriteConfig cfg = getConfigForInlineCompaction(5, 10, CompactionTriggerStrategy.TIME_ELAPSED);
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        String instantTime = HoodieActiveTimeline.createNewInstantTime();
        List<HoodieRecord> records = dataGen.generateInserts(instantTime, 10);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        runNextDeltaCommits(writeClient, readClient, Arrays.asList(instantTime), records, cfg, true, new ArrayList<>());
        // after 10s, that will trigger compaction
        String finalInstant = HoodieActiveTimeline.createNewInstantTime(10000);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 100), writeClient, metaClient, cfg, false);
        // Then: ensure the file slices are compacted as per policy
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
        assertEquals(HoodieTimeline.COMMIT_ACTION, metaClient.getActiveTimeline().lastInstant().get().getAction());
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 52 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestInlineCompaction method testSuccessfulCompactionBasedOnNumOrTime.

@Test
public void testSuccessfulCompactionBasedOnNumOrTime() throws Exception {
    // Given: make three commits
    HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 20, CompactionTriggerStrategy.NUM_OR_TIME);
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 10);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        // Then: trigger the compaction because reach 3 commits.
        String finalInstant = HoodieActiveTimeline.createNewInstantTime();
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
        // 4th commit, that will trigger compaction because reach the time elapsed
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        finalInstant = HoodieActiveTimeline.createNewInstantTime(20000);
        createNextDeltaCommit(finalInstant, dataGen.generateUpdates(finalInstant, 10), writeClient, metaClient, cfg, false);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        assertEquals(6, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    }
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 53 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestInlineCompaction method testCompactionRetryOnFailureBasedOnTime.

@Test
public void testCompactionRetryOnFailureBasedOnTime() throws Exception {
    // Given: two commits, schedule compaction and its failed/in-flight
    HoodieWriteConfig cfg = getConfigBuilder(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).withMaxDeltaSecondsBeforeCompaction(5).withInlineCompactionTriggerStrategy(CompactionTriggerStrategy.TIME_ELAPSED).build()).build();
    String instantTime;
    List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(instants.get(0), 100);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        // Schedule compaction instantTime, make it in-flight (simulates inline compaction failing)
        instantTime = HoodieActiveTimeline.createNewInstantTime(10000);
        scheduleCompaction(instantTime, writeClient, cfg);
        moveCompactionFromRequestedToInflight(instantTime, cfg);
    }
    // When: commit happens after 10s
    HoodieWriteConfig inlineCfg = getConfigForInlineCompaction(5, 10, CompactionTriggerStrategy.TIME_ELAPSED);
    String instantTime2;
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(inlineCfg)) {
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        instantTime2 = HoodieActiveTimeline.createNewInstantTime();
        createNextDeltaCommit(instantTime2, dataGen.generateUpdates(instantTime2, 10), writeClient, metaClient, inlineCfg, false);
    }
    // Then: 1 delta commit is done, the failed compaction is retried
    metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
    assertEquals(4, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    assertEquals(instantTime, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants().firstInstant().get().getTimestamp());
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 54 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestInlineCompaction method testCompactionIsNotScheduledEarly.

@Test
public void testCompactionIsNotScheduledEarly() throws Exception {
    // Given: make two commits
    HoodieWriteConfig cfg = getConfigForInlineCompaction(3, 60, CompactionTriggerStrategy.NUM_COMMITS);
    try (SparkRDDWriteClient<?> writeClient = getHoodieWriteClient(cfg)) {
        List<HoodieRecord> records = dataGen.generateInserts(HoodieActiveTimeline.createNewInstantTime(), 100);
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<String> instants = IntStream.range(0, 2).mapToObj(i -> HoodieActiveTimeline.createNewInstantTime()).collect(Collectors.toList());
        runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        // Then: ensure no compaction is executed since there are only 2 delta commits
        assertEquals(2, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
    }
}
Also used : IntStream(java.util.stream.IntStream) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Arrays(java.util.Arrays) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Example 55 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestHoodieCompactionStrategy method testUnboundedPartitionAwareCompactionSimple.

@Test
public void testUnboundedPartitionAwareCompactionSimple() {
    Map<Long, List<Long>> sizesMap = new HashMap<>();
    sizesMap.put(120 * MB, Arrays.asList(60 * MB, 10 * MB, 80 * MB));
    sizesMap.put(110 * MB, new ArrayList<>());
    sizesMap.put(100 * MB, Collections.singletonList(MB));
    sizesMap.put(80 * MB, Collections.singletonList(MB));
    sizesMap.put(70 * MB, Collections.singletonList(MB));
    sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
    SimpleDateFormat format = new SimpleDateFormat("yyyy/MM/dd");
    Date today = new Date();
    String currentDay = format.format(today);
    String currentDayMinus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-1));
    String currentDayMinus2 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-2));
    String currentDayMinus3 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(-3));
    String currentDayPlus1 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(1));
    String currentDayPlus5 = format.format(BoundedPartitionAwareCompactionStrategy.getDateAtOffsetFromToday(5));
    Map<Long, String> keyToPartitionMap = Collections.unmodifiableMap(new HashMap<Long, String>() {

        {
            put(120 * MB, currentDay);
            put(110 * MB, currentDayMinus1);
            put(100 * MB, currentDayMinus2);
            put(80 * MB, currentDayMinus3);
            put(90 * MB, currentDayPlus1);
            put(70 * MB, currentDayPlus5);
        }
    });
    UnBoundedPartitionAwareCompactionStrategy strategy = new UnBoundedPartitionAwareCompactionStrategy();
    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetPartitionsPerDayBasedCompaction(2).build()).build();
    List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap);
    List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
    assertTrue(returned.size() < operations.size(), "UnBoundedPartitionAwareCompactionStrategy should not include last " + writeConfig.getTargetPartitionsPerDayBasedCompaction() + " partitions or later partitions from today");
    assertEquals(1, returned.size(), "BoundedPartitionAwareCompactionStrategy should have resulted in 1 compaction");
}
Also used : HashMap(java.util.HashMap) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Date(java.util.Date) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) ArrayList(java.util.ArrayList) List(java.util.List) SimpleDateFormat(java.text.SimpleDateFormat) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)327 Test (org.junit.jupiter.api.Test)179 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)173 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)169 ArrayList (java.util.ArrayList)136 List (java.util.List)133 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)126 HoodieTable (org.apache.hudi.table.HoodieTable)117 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)111 HashMap (java.util.HashMap)93 Path (org.apache.hadoop.fs.Path)92 WriteStatus (org.apache.hudi.client.WriteStatus)86 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)84 Collectors (java.util.stream.Collectors)81 Map (java.util.Map)76 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)76 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)74 Arrays (java.util.Arrays)73 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)72 Option (org.apache.hudi.common.util.Option)69