Search in sources :

Example 61 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestCleanPlanExecutor method testKeepXHoursWithCleaning.

/**
 * Tests cleaning service based on number of hours retained.
 */
@ParameterizedTest
@MethodSource("argumentsForTestKeepLatestCommits")
public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withIncrementalCleaningMode(enableIncrementalClean).withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER).withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2).build()).build();
    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
    String p0 = "2020/01/01";
    String p1 = "2020/01/02";
    Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
    String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId() : UUID.randomUUID().toString();
    String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId() : UUID.randomUUID().toString();
    Instant instant = Instant.now();
    ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
    int minutesForFirstCommit = 150;
    String firstCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
    testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    HoodieCommitMetadata commitMetadata = generateCommitMetadata(firstCommitTs, Collections.unmodifiableMap(new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0));
            put(p1, CollectionUtils.createImmutableList(file1P1C0));
        }
    }));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    metaClient = HoodieTableMetaClient.reload(metaClient);
    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
    assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
    assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
    assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    int minutesForSecondCommit = 90;
    String secondCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
    Map<String, String> partitionAndFileId002 = testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0, p1);
    String file2P0C1 = partitionAndFileId002.get(p0);
    String file2P1C1 = partitionAndFileId002.get(p1);
    testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    commitMetadata = generateCommitMetadata(secondCommitTs, new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
            put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
        }
    });
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
    assertEquals(2, hoodieCleanStatsTwo.size(), "Should clean one file each from both the partitions");
    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
    assertFalse(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
    assertFalse(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HashMap(java.util.HashMap) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Instant(java.time.Instant) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) ZonedDateTime(java.time.ZonedDateTime) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) List(java.util.List) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 62 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestHoodieMergeHandle method testHoodieMergeHandleWriteStatMetrics.

@ParameterizedTest
@MethodSource("testArguments")
public void testHoodieMergeHandleWriteStatMetrics(ExternalSpillableMap.DiskMapType diskMapType, boolean isCompressionEnabled) throws Exception {
    // insert 100 records
    // Build a common config with diff configs
    Properties properties = new Properties();
    properties.setProperty(HoodieCommonConfig.SPILLABLE_DISK_MAP_TYPE.key(), diskMapType.name());
    properties.setProperty(HoodieCommonConfig.DISK_MAP_BITCASK_COMPRESSION_ENABLED.key(), String.valueOf(isCompressionEnabled));
    HoodieWriteConfig config = getConfigBuilder().withProperties(properties).build();
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        String newCommitTime = "100";
        writeClient.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
        JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
        List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
        // All records should be inserts into new parquet
        assertTrue(statuses.stream().filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0);
        // Num writes should be equal to the number of records inserted
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of records updated
        assertEquals(0, (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of insert records converted to updates as part of small file
        // handling
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
        // Update all the 100 records
        metaClient = HoodieTableMetaClient.reload(metaClient);
        newCommitTime = "101";
        writeClient.startCommitWithTime(newCommitTime);
        List<HoodieRecord> updatedRecords = dataGen.generateUpdates(newCommitTime, records);
        JavaRDD<HoodieRecord> updatedRecordsRDD = jsc.parallelize(updatedRecords, 1);
        statuses = writeClient.upsert(updatedRecordsRDD, newCommitTime).collect();
        // All records should be upserts into existing parquet
        assertEquals(0, statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
        // Num writes should be equal to the number of records inserted
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of records updated
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of insert records converted to updates as part of small file
        // handling
        assertEquals(0, (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
        newCommitTime = "102";
        writeClient.startCommitWithTime(newCommitTime);
        List<HoodieRecord> allRecords = dataGen.generateInserts(newCommitTime, 100);
        allRecords.addAll(updatedRecords);
        JavaRDD<HoodieRecord> allRecordsRDD = jsc.parallelize(allRecords, 1);
        statuses = writeClient.upsert(allRecordsRDD, newCommitTime).collect();
        // All records should be upserts into existing parquet (with inserts as updates small file handled)
        assertEquals(0, (long) statuses.stream().filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count());
        // Num writes should be equal to the total number of records written
        assertEquals(200, (long) statuses.stream().map(status -> status.getStat().getNumWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of records updated (including inserts converted as updates)
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumUpdateWrites()).reduce((a, b) -> a + b).get());
        // Num update writes should be equal to the number of insert records converted to updates as part of small file
        // handling
        assertEquals(100, (long) statuses.stream().map(status -> status.getStat().getNumInserts()).reduce((a, b) -> a + b).get());
        // Verify all records have location set
        statuses.forEach(writeStatus -> {
            writeStatus.getWrittenRecords().forEach(r -> {
                // Ensure New Location is set
                assertTrue(r.getNewLocation().isPresent());
            });
        });
    }
}
Also used : BeforeEach(org.junit.jupiter.api.BeforeEach) Dataset(org.apache.spark.sql.Dataset) FileSystem(org.apache.hadoop.fs.FileSystem) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) ExternalSpillableMap(org.apache.hudi.common.util.collection.ExternalSpillableMap) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) MethodSource(org.junit.jupiter.params.provider.MethodSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Row(org.apache.spark.sql.Row) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) HoodieCommonConfig(org.apache.hudi.common.config.HoodieCommonConfig) FSUtils(org.apache.hudi.common.fs.FSUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 63 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestHoodieTimelineArchiver method testArchiveRollbacksTestTable.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testArchiveRollbacksTestTable(boolean enableMetadata) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 3, 2);
    for (int i = 1; i < 9; i += 2) {
        testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        testTable.doRollback("0000000" + i, "0000000" + (i + 1));
        // trigger archival
        Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
        List<HoodieInstant> originalCommits = commitsList.getKey();
        List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
        if (i != 7) {
            assertEquals(originalCommits, commitsAfterArchival);
        } else {
            // only time when archival will kick in
            List<HoodieInstant> expectedArchivedInstants = new ArrayList<>();
            expectedArchivedInstants.addAll(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000003")));
            expectedArchivedInstants.addAll(getAllArchivedCommitInstants(Arrays.asList("00000002", "00000004"), HoodieTimeline.ROLLBACK_ACTION));
            List<HoodieInstant> expectedActiveInstants = new ArrayList<>();
            expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000005", "00000007")));
            expectedActiveInstants.addAll(getActiveCommitInstants(Arrays.asList("00000006", "00000008"), HoodieTimeline.ROLLBACK_ACTION));
            verifyArchival(expectedArchivedInstants, expectedActiveInstants, commitsAfterArchival);
        }
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) ArrayList(java.util.ArrayList) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 64 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestHoodieTimelineArchiver method testNoArchivalWithInflightCompactionInMiddle.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testNoArchivalWithInflightCompactionInMiddle(boolean enableMetadata) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 2, 2, HoodieTableType.MERGE_ON_READ);
    // when max archival commits is set to 4, even after 7 commits, if there is an inflight compaction in the middle, archival should not kick in.
    HoodieCommitMetadata inflightCompactionMetadata = null;
    for (int i = 1; i < 8; i++) {
        if (i == 2) {
            inflightCompactionMetadata = testTable.doCompaction("0000000" + i, Arrays.asList("p1", "p2"), true);
        } else {
            testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        }
        // archival
        Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
        List<HoodieInstant> originalCommits = commitsList.getKey();
        List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
        if (enableMetadata) {
            assertEquals(originalCommits, commitsAfterArchival);
        } else {
            if (i != 6) {
                assertEquals(originalCommits, commitsAfterArchival);
            } else {
                // on 7th commit, archival will kick in. but will archive only one commit since 2nd compaction commit is inflight.
                assertEquals(originalCommits.size() - commitsAfterArchival.size(), 1);
                for (int j = 1; j <= 6; j++) {
                    if (j == 1) {
                        // first commit should be archived
                        assertFalse(commitsAfterArchival.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "0000000" + j)));
                    } else if (j == 2) {
                        // 2nd compaction should not be archived
                        assertFalse(commitsAfterArchival.contains(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, "0000000" + j)));
                    } else {
                        // every other commit should not be archived
                        assertTrue(commitsAfterArchival.contains(new HoodieInstant(State.COMPLETED, HoodieTimeline.DELTA_COMMIT_ACTION, "0000000" + j)));
                    }
                }
            }
        }
    }
    // move inflight compaction to complete and add one regular write commit. archival should archive more commits.
    // an extra one commit is required, bcoz compaction in data table will not trigger table services in metadata table.
    // before this move, timeline : 2_inflight_compaction, 3,4,5,6,7.
    // after this move: 6,7,8 (2,3,4,5 will be archived)
    testTable.moveInflightCompactionToComplete("00000002", inflightCompactionMetadata);
    testTable.doWriteOperation("00000008", WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), 2);
    Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
    List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
    List<HoodieInstant> archivedInstants = getAllArchivedCommitInstants(Arrays.asList("00000001", "00000003", "00000004", "00000005", "00000006"), HoodieTimeline.DELTA_COMMIT_ACTION);
    archivedInstants.add(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, "00000002"));
    archivedInstants.add(new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "00000002"));
    verifyArchival(archivedInstants, getActiveCommitInstants(Arrays.asList("00000007", "00000008"), HoodieTimeline.DELTA_COMMIT_ACTION), commitsAfterArchival);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) ArrayList(java.util.ArrayList) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 65 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestHoodieTimelineArchiver method testMergeSmallArchiveFilesRecoverFromMergeFailed.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMergeSmallArchiveFilesRecoverFromMergeFailed(boolean enableArchiveMerge) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
    // do ingestion and trigger archive actions here.
    for (int i = 1; i < 8; i++) {
        testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        archiveAndGetCommitsList(writeConfig);
    }
    // do a single merge small archive files
    HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
    FileStatus[] fsStatuses = metaClient.getFs().globStatus(new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
    List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
    archiver.reOpenWriter();
    archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
    archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
    HoodieLogFormat.Writer writer = archiver.reOpenWriter();
    // check loading archived and active timeline success
    HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
    HoodieArchivedTimeline archivedTimeLine = metaClient.getArchivedTimeline().reload();
    assertEquals(7 * 3, rawActiveTimeline.countInstants() + archivedTimeLine.reload().countInstants());
    String s = "Dummy Content";
    // stain the current merged archive file.
    FileIOUtils.createFileInPath(metaClient.getFs(), writer.getLogFile().getPath(), Option.of(s.getBytes()));
    // do another archive actions with merge small archive files.
    for (int i = 1; i < 10; i++) {
        testTable.doWriteOperation("1000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
        archiveAndGetCommitsList(writeConfig);
    }
    // check result.
    // we need to load archived timeline successfully and ignore the parsing damage merged archive files exception.
    HoodieActiveTimeline rawActiveTimeline1 = new HoodieActiveTimeline(metaClient, false);
    HoodieArchivedTimeline archivedTimeLine1 = metaClient.getArchivedTimeline().reload();
    assertEquals(16 * 3, archivedTimeLine1.countInstants() + rawActiveTimeline1.countInstants());
    // if there are a damaged merged archive files and other common damaged archive file.
    // hoodie need throw ioe while loading archived timeline because of parsing the damaged archive file.
    Path damagedFile = new Path(metaClient.getArchivePath(), ".commits_.archive.300_1-0-1");
    FileIOUtils.createFileInPath(metaClient.getFs(), damagedFile, Option.of(s.getBytes()));
    assertThrows(HoodieException.class, () -> metaClient.getArchivedTimeline().reload());
}
Also used : Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWrapperFileSystem(org.apache.hudi.common.fs.HoodieWrapperFileSystem) Arrays(java.util.Arrays) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) FileIOUtils(org.apache.hudi.common.util.FileIOUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieException(org.apache.hudi.exception.HoodieException) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Collectors(java.util.stream.Collectors) Test(org.junit.jupiter.api.Test) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieTestUtils.createCompactionCommitInMetadataTable(org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) IntStream(java.util.stream.IntStream) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) CsvSource(org.junit.jupiter.params.provider.CsvSource) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) MetadataConversionUtils(org.apache.hudi.client.utils.MetadataConversionUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetadata(org.apache.hudi.metadata.HoodieTableMetadata) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) LogManager(org.apache.log4j.LogManager) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Comparator(java.util.Comparator) Collections(java.util.Collections) Pair(org.apache.hudi.common.util.collection.Pair) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline) HoodieTimelineArchiver(org.apache.hudi.client.HoodieTimelineArchiver) HoodieLogFormat(org.apache.hudi.common.table.log.HoodieLogFormat) HoodieTable(org.apache.hudi.table.HoodieTable) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)327 Test (org.junit.jupiter.api.Test)179 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)173 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)169 ArrayList (java.util.ArrayList)136 List (java.util.List)133 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)126 HoodieTable (org.apache.hudi.table.HoodieTable)117 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)111 HashMap (java.util.HashMap)93 Path (org.apache.hadoop.fs.Path)92 WriteStatus (org.apache.hudi.client.WriteStatus)86 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)84 Collectors (java.util.stream.Collectors)81 Map (java.util.Map)76 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)76 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)74 Arrays (java.util.Arrays)73 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)72 Option (org.apache.hudi.common.util.Option)69