Examples with BaseFileUtils - org.apache.hudi.common.util.BaseFileUtils

Example 1 with BaseFileUtils

use of org.apache.hudi.common.util.BaseFileUtils in project hudi by apache.

the class BootstrapOperator method loadRecords.

/**
 * Loads all the indices of give partition path into the backup state.
 *
 * @param partitionPath The partition path
 */
@SuppressWarnings("unchecked")
protected void loadRecords(String partitionPath) throws Exception {
    long start = System.currentTimeMillis();
    final int parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
    final int maxParallelism = getRuntimeContext().getMaxNumberOfParallelSubtasks();
    final int taskID = getRuntimeContext().getIndexOfThisSubtask();
    HoodieTimeline commitsTimeline = this.hoodieTable.getMetaClient().getCommitsTimeline();
    if (!StringUtils.isNullOrEmpty(lastInstantTime)) {
        commitsTimeline = commitsTimeline.findInstantsAfter(lastInstantTime);
    }
    Option<HoodieInstant> latestCommitTime = commitsTimeline.filterCompletedInstants().lastInstant();
    if (latestCommitTime.isPresent()) {
        BaseFileUtils fileUtils = BaseFileUtils.getInstance(this.hoodieTable.getBaseFileFormat());
        Schema schema = new TableSchemaResolver(this.hoodieTable.getMetaClient()).getTableAvroSchema();
        List<FileSlice> fileSlices = this.hoodieTable.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp(), true).collect(toList());
        for (FileSlice fileSlice : fileSlices) {
            if (!shouldLoadFile(fileSlice.getFileId(), maxParallelism, parallelism, taskID)) {
                continue;
            }
            LOG.info("Load records from {}.", fileSlice);
            // load parquet records
            fileSlice.getBaseFile().ifPresent(baseFile -> {
                // filter out crushed files
                if (!isValidFile(baseFile.getFileStatus())) {
                    return;
                }
                try (ClosableIterator<HoodieKey> iterator = fileUtils.getHoodieKeyIterator(this.hadoopConf, new Path(baseFile.getPath()))) {
                    iterator.forEachRemaining(hoodieKey -> {
                        output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
                    });
                }
            });
            // load avro log records
            List<String> logPaths = fileSlice.getLogFiles().filter(logFile -> isValidFile(logFile.getFileStatus())).map(logFile -> logFile.getPath().toString()).collect(toList());
            HoodieMergedLogRecordScanner scanner = FormatUtils.logScanner(logPaths, schema, latestCommitTime.get().getTimestamp(), writeConfig, hadoopConf);
            try {
                for (String recordKey : scanner.getRecords().keySet()) {
                    output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(new HoodieKey(recordKey, partitionPath), fileSlice))));
                }
            } catch (Exception e) {
                throw new HoodieException(String.format("Error when loading record keys from files: %s", logPaths), e);
            } finally {
                scanner.close();
            }
        }
    }
    long cost = System.currentTimeMillis() - start;
    LOG.info("Task [{}}:{}}] finish loading the index under partition {} and sending them to downstream, time cost: {} milliseconds.", this.getClass().getSimpleName(), taskID, partitionPath, cost);
}

Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) Path(org.apache.hadoop.fs.Path) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieException(org.apache.hudi.exception.HoodieException) LoggerFactory(org.slf4j.LoggerFactory) Option(org.apache.hudi.common.util.Option) ClosableIterator(org.apache.hudi.common.util.ClosableIterator) BootstrapAggFunction(org.apache.hudi.sink.bootstrap.aggregate.BootstrapAggFunction) CkpMetadata(org.apache.hudi.sink.meta.CkpMetadata) ListState(org.apache.flink.api.common.state.ListState) StringUtils(org.apache.hudi.common.util.StringUtils) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) FlinkTables(org.apache.hudi.util.FlinkTables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) Path(org.apache.hadoop.fs.Path) StreamerUtil(org.apache.hudi.util.StreamerUtil) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Types(org.apache.flink.api.common.typeinfo.Types) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) HoodieRecordGlobalLocation(org.apache.hudi.common.model.HoodieRecordGlobalLocation) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) Logger(org.slf4j.Logger) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Iterator(java.util.Iterator) Configuration(org.apache.flink.configuration.Configuration) StreamerUtil.isValidFile(org.apache.hudi.util.StreamerUtil.isValidFile) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) GlobalAggregateManager(org.apache.flink.runtime.taskexecutor.GlobalAggregateManager) HoodieKey(org.apache.hudi.common.model.HoodieKey) Pattern(java.util.regex.Pattern) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) FlinkOptions(org.apache.hudi.configuration.FlinkOptions) FormatUtils(org.apache.hudi.table.format.FormatUtils) KeyGroupRangeAssignment(org.apache.flink.runtime.state.KeyGroupRangeAssignment) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieMergedLogRecordScanner(org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner) Schema(org.apache.avro.Schema) TableSchemaResolver(org.apache.hudi.common.table.TableSchemaResolver) HoodieException(org.apache.hudi.exception.HoodieException) HoodieException(org.apache.hudi.exception.HoodieException) HoodieKey(org.apache.hudi.common.model.HoodieKey) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils)

Example 2 with BaseFileUtils

use of org.apache.hudi.common.util.BaseFileUtils in project hudi by apache.

the class HoodieKeyLocationFetchHandle method locations.

public Stream<Pair<HoodieKey, HoodieRecordLocation>> locations() {
    HoodieBaseFile baseFile = partitionPathBaseFilePair.getRight();
    BaseFileUtils baseFileUtils = BaseFileUtils.getInstance(baseFile.getPath());
    List<HoodieKey> hoodieKeyList = new ArrayList<>();
    if (keyGeneratorOpt.isPresent()) {
        hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
    } else {
        hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
    }
    return hoodieKeyList.stream().map(entry -> Pair.of(entry, new HoodieRecordLocation(baseFile.getCommitTime(), baseFile.getFileId())));
}

Also used : Path(org.apache.hadoop.fs.Path) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieKey(org.apache.hudi.common.model.HoodieKey) ArrayList(java.util.ArrayList) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation)

Example 3 with BaseFileUtils

use of org.apache.hudi.common.util.BaseFileUtils in project hudi by apache.

the class TestHoodieConcatHandle method testInsert.

@Test
public void testInsert() throws Exception {
    HoodieWriteConfig config = makeHoodieClientConfigBuilder().withMergeAllowDuplicateOnInserts(true).build();
    HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
    // Get some records belong to the same partition (2021/09/11)
    String insertRecordStr1 = "{\"_row_key\":\"1\"," + "\"time\":\"2021-09-11T16:16:41.415Z\",\"number\":1}";
    String insertRecordStr2 = "{\"_row_key\":\"2\"," + "\"time\":\"2021-09-11T16:16:41.415Z\",\"number\":2}";
    List<HoodieRecord> records1 = new ArrayList<>();
    RawTripTestPayload insertRow1 = new RawTripTestPayload(insertRecordStr1);
    RawTripTestPayload insertRow2 = new RawTripTestPayload(insertRecordStr2);
    records1.add(new HoodieAvroRecord(new HoodieKey(insertRow1.getRowKey(), insertRow1.getPartitionPath()), insertRow1));
    records1.add(new HoodieAvroRecord(new HoodieKey(insertRow2.getRowKey(), insertRow2.getPartitionPath()), insertRow2));
    int startInstant = 1;
    String firstCommitTime = makeNewCommitTime(startInstant++);
    // First insert
    writeClient.startCommitWithTime(firstCommitTime);
    writeClient.insert(records1, firstCommitTime);
    String partitionPath = "2021/09/11";
    FileStatus[] allFiles = getIncrementalFiles(partitionPath, "0", -1);
    assertEquals(1, allFiles.length);
    // Read out the bloom filter and make sure filter can answer record exist or not
    Path filePath = allFiles[0].getPath();
    BloomFilter filter = fileUtils.readBloomFilterFromMetadata(hadoopConf, filePath);
    for (HoodieRecord record : records1) {
        assertTrue(filter.mightContain(record.getRecordKey()));
    }
    insertRecordStr1 = "{\"_row_key\":\"1\"," + "\"time\":\"2021-09-11T16:39:41.415Z\",\"number\":3}";
    insertRecordStr2 = "{\"_row_key\":\"2\"," + "\"time\":\"2021-09-11T16:39:41.415Z\",\"number\":4}";
    List<HoodieRecord> records2 = new ArrayList<>();
    insertRow1 = new RawTripTestPayload(insertRecordStr1);
    insertRow2 = new RawTripTestPayload(insertRecordStr2);
    // The recordKey of records2 and records1 are the same, but the values of other fields are different
    records2.add(new HoodieAvroRecord(new HoodieKey(insertRow1.getRowKey(), insertRow1.getPartitionPath()), insertRow1));
    records2.add(new HoodieAvroRecord(new HoodieKey(insertRow2.getRowKey(), insertRow2.getPartitionPath()), insertRow2));
    String newCommitTime = makeNewCommitTime(startInstant++);
    writeClient.startCommitWithTime(newCommitTime);
    // Second insert is the same as the _row_key of the first one,test allowDuplicateInserts
    writeClient.insert(records2, newCommitTime);
    allFiles = getIncrementalFiles(partitionPath, firstCommitTime, -1);
    assertEquals(1, allFiles.length);
    // verify new incremental file group is same as the previous one
    assertEquals(FSUtils.getFileId(filePath.getName()), FSUtils.getFileId(allFiles[0].getPath().getName()));
    filePath = allFiles[0].getPath();
    // The final result should be a collection of records1 and records2
    records1.addAll(records2);
    // Read the base file, check the record content
    List<GenericRecord> fileRecords = fileUtils.readAvroRecords(hadoopConf, filePath);
    int index = 0;
    for (GenericRecord record : fileRecords) {
        assertEquals(records1.get(index).getRecordKey(), record.get("_row_key").toString());
        assertEquals(index + 1, record.get("number"));
        index++;
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieJavaWriteClient(org.apache.hudi.client.HoodieJavaWriteClient) HoodieKey(org.apache.hudi.common.model.HoodieKey) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with BaseFileUtils

use of org.apache.hudi.common.util.BaseFileUtils in project hudi by apache.

the class TestHoodieConcatHandle method testInsertWithDataGenerator.

@ParameterizedTest
@ValueSource(booleans = { false, true })
public void testInsertWithDataGenerator(boolean mergeAllowDuplicateOnInsertsEnable) throws Exception {
    HoodieWriteConfig config = makeHoodieClientConfigBuilder(TRIP_EXAMPLE_SCHEMA).withMergeAllowDuplicateOnInserts(mergeAllowDuplicateOnInsertsEnable).build();
    HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
    String partitionPath = "2021/09/11";
    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] { partitionPath });
    int startInstant = 1;
    String firstCommitTime = makeNewCommitTime(startInstant++);
    List<HoodieRecord> records1 = dataGenerator.generateInserts(firstCommitTime, 100);
    // First insert
    writeClient.startCommitWithTime(firstCommitTime);
    writeClient.insert(records1, firstCommitTime);
    FileStatus[] allFiles = getIncrementalFiles(partitionPath, "0", -1);
    assertEquals(1, allFiles.length);
    // Read out the bloom filter and make sure filter can answer record exist or not
    Path filePath = allFiles[0].getPath();
    BloomFilter filter = fileUtils.readBloomFilterFromMetadata(hadoopConf, filePath);
    for (HoodieRecord record : records1) {
        assertTrue(filter.mightContain(record.getRecordKey()));
    }
    String newCommitTime = makeNewCommitTime(startInstant++);
    List<HoodieRecord> records2 = dataGenerator.generateUpdates(newCommitTime, 100);
    writeClient.startCommitWithTime(newCommitTime);
    // Second insert is the same as the _row_key of the first one,test allowDuplicateInserts
    writeClient.insert(records2, newCommitTime);
    allFiles = getIncrementalFiles(partitionPath, firstCommitTime, -1);
    assertEquals(1, allFiles.length);
    // verify new incremental file group is same as the previous one
    assertEquals(FSUtils.getFileId(filePath.getName()), FSUtils.getFileId(allFiles[0].getPath().getName()));
    filePath = allFiles[0].getPath();
    // If mergeAllowDuplicateOnInsertsEnable is true, the final result should be a collection of records1 and records2
    records1.addAll(records2);
    // Read the base file, check the record content
    List<GenericRecord> fileRecords = fileUtils.readAvroRecords(hadoopConf, filePath);
    assertEquals(fileRecords.size(), mergeAllowDuplicateOnInsertsEnable ? records1.size() : records2.size());
    int index = 0;
    for (GenericRecord record : fileRecords) {
        assertEquals(records1.get(index).getRecordKey(), record.get("_row_key").toString());
        index++;
    }
}

Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BloomFilter(org.apache.hudi.common.bloom.BloomFilter) HoodieJavaWriteClient(org.apache.hudi.client.HoodieJavaWriteClient) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 5 with BaseFileUtils

use of org.apache.hudi.common.util.BaseFileUtils in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testSmallInsertHandlingForInserts.

/**
 * Test scenario of new file-group getting added during insert().
 */
@ParameterizedTest
@MethodSource("smallInsertHandlingParams")
public void testSmallInsertHandlingForInserts(boolean mergeAllowDuplicateInserts) throws Exception {
    final String testPartitionPath = "2016/09/26";
    final int insertSplitLimit = 100;
    // setup the small file handling params
    // hold upto 200 records max
    HoodieWriteConfig config = getSmallInsertWriteConfig(insertSplitLimit, false, mergeAllowDuplicateInserts);
    dataGen = new HoodieTestDataGenerator(new String[] { testPartitionPath });
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
    // Inserts => will write file1
    String commitTime1 = "001";
    client.startCommitWithTime(commitTime1);
    // this writes ~500kb
    List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, insertSplitLimit);
    Set<String> keys1 = recordsToRecordKeySet(inserts1);
    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
    List<WriteStatus> statuses = client.insert(insertRecordsRDD1, commitTime1).collect();
    assertNoWriteErrors(statuses);
    assertPartitionMetadata(new String[] { testPartitionPath }, fs);
    assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
    String file1 = statuses.get(0).getFileId();
    assertEquals(100, fileUtils.readRowKeys(hadoopConf, new Path(basePath, statuses.get(0).getStat().getPath())).size(), "file should contain 100 records");
    // Second, set of Inserts should just expand file1
    String commitTime2 = "002";
    client.startCommitWithTime(commitTime2);
    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
    Set<String> keys2 = recordsToRecordKeySet(inserts2);
    JavaRDD<HoodieRecord> insertRecordsRDD2 = jsc.parallelize(inserts2, 1);
    statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
    assertNoWriteErrors(statuses);
    assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
    assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
    assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
    Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
    assertEquals(140, fileUtils.readRowKeys(hadoopConf, newFile).size(), "file should contain 140 records");
    List<GenericRecord> records = fileUtils.readAvroRecords(hadoopConf, newFile);
    for (GenericRecord record : records) {
        String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
        String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
        assertTrue(commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime), "Record expected to be part of commit 1 or commit2");
        assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit 1 or commit2");
    }
    // Lots of inserts such that file1 is updated and expanded, a new file2 is created.
    String commitTime3 = "003";
    client.startCommitWithTime(commitTime3);
    List<HoodieRecord> inserts3 = dataGen.generateInserts(commitTime3, 200);
    JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(inserts3, 1);
    statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
    assertNoWriteErrors(statuses);
    assertEquals(2, statuses.size(), "2 files needs to be committed.");
    assertEquals(340, fileUtils.readRowKeys(hadoopConf, new Path(basePath, statuses.get(0).getStat().getPath())).size() + fileUtils.readRowKeys(hadoopConf, new Path(basePath, statuses.get(1).getStat().getPath())).size(), "file should contain 340 records");
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    HoodieTable table = getHoodieTable(metaClient, config);
    List<HoodieBaseFile> files = table.getBaseFileOnlyView().getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
    assertEquals(2, files.size(), "Total of 2 valid data files");
    int totalInserts = 0;
    for (HoodieBaseFile file : files) {
        assertEquals(commitTime3, file.getCommitTime(), "All files must be at commit 3");
        totalInserts += fileUtils.readAvroRecords(hadoopConf, new Path(file.getPath())).size();
    }
    assertEquals(totalInserts, inserts1.size() + inserts2.size() + inserts3.size(), "Total number of records must add up");
}

Also used : Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

Path (org.apache.hadoop.fs.Path)7 BaseFileUtils (org.apache.hudi.common.util.BaseFileUtils)7 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)6 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)6 GenericRecord (org.apache.avro.generic.GenericRecord)5 ArrayList (java.util.ArrayList)4 HoodieKey (org.apache.hudi.common.model.HoodieKey)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 FileStatus (org.apache.hadoop.fs.FileStatus)3 HoodieJavaWriteClient (org.apache.hudi.client.HoodieJavaWriteClient)3 WriteStatus (org.apache.hudi.client.WriteStatus)3 BloomFilter (org.apache.hudi.common.bloom.BloomFilter)3 HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)3 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)3 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)3 HoodieTable (org.apache.hudi.table.HoodieTable)3 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)2 Test (org.junit.jupiter.api.Test)2 Collections (java.util.Collections)1 Iterator (java.util.Iterator)1