Search in sources :

Example 66 with HoodieTestDataGenerator

use of org.apache.hudi.common.testutils.HoodieTestDataGenerator in project hudi by apache.

the class TestHoodieBackedMetadata method testNonPartitioned.

@Test
public void testNonPartitioned() throws Exception {
    init(HoodieTableType.COPY_ON_WRITE, false);
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    HoodieTestDataGenerator nonPartitionedGenerator = new HoodieTestDataGenerator(new String[] { "" });
    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, getWriteConfig(true, true))) {
        // Write 1 (Bulk insert)
        String newCommitTime = "0000001";
        List<HoodieRecord> records = nonPartitionedGenerator.generateInserts(newCommitTime, 10);
        client.startCommitWithTime(newCommitTime);
        List<WriteStatus> writeStatuses = client.bulkInsert(jsc.parallelize(records, 1), newCommitTime).collect();
        validateMetadata(client);
        List<String> metadataPartitions = metadata(client).getAllPartitionPaths();
        assertTrue(metadataPartitions.contains(""), "Must contain empty partition");
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 67 with HoodieTestDataGenerator

use of org.apache.hudi.common.testutils.HoodieTestDataGenerator in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testSmallInsertHandlingForUpserts.

/**
 * Test scenario of new file-group getting added during upsert().
 */
@Test
public void testSmallInsertHandlingForUpserts() throws Exception {
    final String testPartitionPath = "2016/09/26";
    final int insertSplitLimit = 100;
    // setup the small file handling params
    // hold upto 200 records max
    HoodieWriteConfig config = getSmallInsertWriteConfig(insertSplitLimit, TRIP_EXAMPLE_SCHEMA, dataGen.getEstimatedFileSizeInBytes(150));
    dataGen = new HoodieTestDataGenerator(new String[] { testPartitionPath });
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
    // Inserts => will write file1
    String commitTime1 = "001";
    client.startCommitWithTime(commitTime1);
    // this writes ~500kb
    List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, insertSplitLimit);
    Set<String> keys1 = recordsToRecordKeySet(inserts1);
    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
    List<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1).collect();
    assertNoWriteErrors(statuses);
    assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
    String file1 = statuses.get(0).getFileId();
    assertEquals(100, fileUtils.readRowKeys(hadoopConf, new Path(basePath, statuses.get(0).getStat().getPath())).size(), "file should contain 100 records");
    // Update + Inserts such that they just expand file1
    String commitTime2 = "002";
    client.startCommitWithTime(commitTime2);
    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
    Set<String> keys2 = recordsToRecordKeySet(inserts2);
    List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
    insertsAndUpdates2.addAll(inserts2);
    insertsAndUpdates2.addAll(dataGen.generateUpdates(commitTime2, inserts1));
    JavaRDD<HoodieRecord> insertAndUpdatesRDD2 = jsc.parallelize(insertsAndUpdates2, 1);
    statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
    assertNoWriteErrors(statuses);
    assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
    assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
    assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
    Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
    assertEquals(140, fileUtils.readRowKeys(hadoopConf, newFile).size(), "file should contain 140 records");
    List<GenericRecord> records = fileUtils.readAvroRecords(hadoopConf, newFile);
    for (GenericRecord record : records) {
        String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
        assertEquals(commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit2");
        assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit2");
    }
    // update + inserts such that file1 is updated and expanded, a new file2 is created.
    String commitTime3 = "003";
    client.startCommitWithTime(commitTime3);
    List<HoodieRecord> insertsAndUpdates3 = dataGen.generateInserts(commitTime3, 200);
    Set<String> keys3 = recordsToRecordKeySet(insertsAndUpdates3);
    List<HoodieRecord> updates3 = dataGen.generateUpdates(commitTime3, inserts2);
    insertsAndUpdates3.addAll(updates3);
    JavaRDD<HoodieRecord> insertAndUpdatesRDD3 = jsc.parallelize(insertsAndUpdates3, 1);
    statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
    assertNoWriteErrors(statuses);
    assertEquals(2, statuses.size(), "2 files needs to be committed.");
    HoodieTableMetaClient metadata = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    HoodieTable table = getHoodieTable(metadata, config);
    BaseFileOnlyView fileSystemView = table.getBaseFileOnlyView();
    List<HoodieBaseFile> files = fileSystemView.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
    int numTotalInsertsInCommit3 = 0;
    int numTotalUpdatesInCommit3 = 0;
    for (HoodieBaseFile file : files) {
        if (file.getFileName().contains(file1)) {
            assertEquals(commitTime3, file.getCommitTime(), "Existing file should be expanded");
            records = fileUtils.readAvroRecords(hadoopConf, new Path(file.getPath()));
            for (GenericRecord record : records) {
                String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                String recordCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
                if (recordCommitTime.equals(commitTime3)) {
                    if (keys2.contains(recordKey)) {
                        keys2.remove(recordKey);
                        numTotalUpdatesInCommit3++;
                    } else {
                        numTotalInsertsInCommit3++;
                    }
                }
            }
            assertEquals(0, keys2.size(), "All keys added in commit 2 must be updated in commit3 correctly");
        } else {
            assertEquals(commitTime3, file.getCommitTime(), "New file must be written for commit 3");
            records = fileUtils.readAvroRecords(hadoopConf, new Path(file.getPath()));
            for (GenericRecord record : records) {
                String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                assertEquals(commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit3");
                assertTrue(keys3.contains(recordKey), "key expected to be part of commit3");
            }
            numTotalInsertsInCommit3 += records.size();
        }
    }
    assertEquals(numTotalUpdatesInCommit3, inserts2.size(), "Total updates in commit3 must add up");
    assertEquals(numTotalInsertsInCommit3, keys3.size(), "Total inserts in commit3 must add up");
}
Also used : Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 68 with HoodieTestDataGenerator

use of org.apache.hudi.common.testutils.HoodieTestDataGenerator in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testParallelInsertAndCleanPreviousFailedCommits.

@Test
public void testParallelInsertAndCleanPreviousFailedCommits() throws Exception {
    HoodieFailedWritesCleaningPolicy cleaningPolicy = HoodieFailedWritesCleaningPolicy.LAZY;
    ExecutorService service = Executors.newFixedThreadPool(2);
    HoodieTestUtils.init(hadoopConf, basePath);
    SparkRDDWriteClient client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true));
    // perform 1 successfull write
    writeBatch(client, "100", "100", Option.of(Arrays.asList("100")), "100", 100, dataGen::generateInserts, SparkRDDWriteClient::bulkInsert, false, 100, 100, 0, true);
    // Perform 2 failed writes to table
    writeBatch(client, "200", "100", Option.of(Arrays.asList("200")), "200", 100, dataGen::generateInserts, SparkRDDWriteClient::bulkInsert, false, 100, 100, 0, false);
    client.close();
    client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true));
    writeBatch(client, "300", "200", Option.of(Arrays.asList("300")), "300", 100, dataGen::generateInserts, SparkRDDWriteClient::bulkInsert, false, 100, 100, 0, false);
    client.close();
    // refresh data generator to delete records generated from failed commits
    dataGen = new HoodieTestDataGenerator();
    // Create a succesful commit
    Future<JavaRDD<WriteStatus>> commit3 = service.submit(() -> writeBatch(new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true)), "400", "300", Option.of(Arrays.asList("400")), "300", 100, dataGen::generateInserts, SparkRDDWriteClient::bulkInsert, false, 100, 100, 0, true));
    commit3.get();
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    assertTrue(metaClient.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 0);
    assertTrue(metaClient.getActiveTimeline().filterInflights().countInstants() == 2);
    assertTrue(metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
    client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true));
    // Await till enough time passes such that the first 2 failed commits heartbeats are expired
    boolean conditionMet = false;
    while (!conditionMet) {
        conditionMet = client.getHeartbeatClient().isHeartbeatExpired("300");
        Thread.sleep(2000);
    }
    Future<JavaRDD<WriteStatus>> commit4 = service.submit(() -> writeBatch(new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true)), "500", "400", Option.of(Arrays.asList("500")), "500", 100, dataGen::generateInserts, SparkRDDWriteClient::bulkInsert, false, 100, 100, 0, true));
    Future<HoodieCleanMetadata> clean1 = service.submit(() -> new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true)).clean());
    commit4.get();
    clean1.get();
    HoodieActiveTimeline timeline = metaClient.getActiveTimeline().reload();
    assertTrue(timeline.getTimelineOfActions(CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 2);
    // Since we write rollbacks not clean, there should be no clean action on the timeline
    assertTrue(timeline.getTimelineOfActions(CollectionUtils.createSet(CLEAN_ACTION)).countInstants() == 0);
    assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 3);
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) ExecutorService(java.util.concurrent.ExecutorService) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 69 with HoodieTestDataGenerator

use of org.apache.hudi.common.testutils.HoodieTestDataGenerator in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method verifyDeletePartitionsHandling.

/**
 *  1) Do write1 (upsert) with 'batch1RecordsCount' number of records for first partition.
 *  2) Do write2 (upsert) with 'batch2RecordsCount' number of records for second partition.
 *  3) Do write3 (upsert) with 'batch3RecordsCount' number of records for third partition.
 *  4) delete first partition and check result.
 *  5) delete second and third partition and check result.
 */
private void verifyDeletePartitionsHandling(int batch1RecordsCount, int batch2RecordsCount, int batch3RecordsCount, boolean populateMetaFields) throws Exception {
    HoodieWriteConfig config = getSmallInsertWriteConfig(2000, TRIP_EXAMPLE_SCHEMA, dataGen.getEstimatedFileSizeInBytes(150), populateMetaFields, populateMetaFields ? new Properties() : getPropertiesForKeyGen());
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    dataGen = new HoodieTestDataGenerator();
    // Do Inserts for DEFAULT_FIRST_PARTITION_PATH
    String commitTime1 = "001";
    Set<String> batch1Buckets = this.insertPartitionRecordsWithCommit(client, batch1RecordsCount, commitTime1, DEFAULT_FIRST_PARTITION_PATH);
    // Do Inserts for DEFAULT_SECOND_PARTITION_PATH
    String commitTime2 = "002";
    Set<String> batch2Buckets = this.insertPartitionRecordsWithCommit(client, batch2RecordsCount, commitTime2, DEFAULT_SECOND_PARTITION_PATH);
    // Do Inserts for DEFAULT_THIRD_PARTITION_PATH
    String commitTime3 = "003";
    Set<String> batch3Buckets = this.insertPartitionRecordsWithCommit(client, batch3RecordsCount, commitTime3, DEFAULT_THIRD_PARTITION_PATH);
    // delete DEFAULT_FIRST_PARTITION_PATH
    String commitTime4 = "004";
    Set<String> deletePartitionReplaceFileIds1 = deletePartitionWithCommit(client, commitTime4, Arrays.asList(DEFAULT_FIRST_PARTITION_PATH));
    assertEquals(batch1Buckets, deletePartitionReplaceFileIds1);
    List<HoodieBaseFile> baseFiles = HoodieClientTestUtils.getLatestBaseFiles(basePath, fs, String.format("%s/%s/*", basePath, DEFAULT_FIRST_PARTITION_PATH));
    assertEquals(0, baseFiles.size());
    baseFiles = HoodieClientTestUtils.getLatestBaseFiles(basePath, fs, String.format("%s/%s/*", basePath, DEFAULT_SECOND_PARTITION_PATH));
    assertTrue(baseFiles.size() > 0);
    baseFiles = HoodieClientTestUtils.getLatestBaseFiles(basePath, fs, String.format("%s/%s/*", basePath, DEFAULT_THIRD_PARTITION_PATH));
    assertTrue(baseFiles.size() > 0);
    // delete DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH
    String commitTime5 = "005";
    Set<String> deletePartitionReplaceFileIds2 = deletePartitionWithCommit(client, commitTime5, Arrays.asList(DEFAULT_SECOND_PARTITION_PATH, DEFAULT_THIRD_PARTITION_PATH));
    Set<String> expectedFileId = new HashSet<>();
    expectedFileId.addAll(batch2Buckets);
    expectedFileId.addAll(batch3Buckets);
    assertEquals(expectedFileId, deletePartitionReplaceFileIds2);
    baseFiles = HoodieClientTestUtils.getLatestBaseFiles(basePath, fs, String.format("%s/%s/*", basePath, DEFAULT_FIRST_PARTITION_PATH), String.format("%s/%s/*", basePath, DEFAULT_SECOND_PARTITION_PATH), String.format("%s/%s/*", basePath, DEFAULT_THIRD_PARTITION_PATH));
    assertEquals(0, baseFiles.size());
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties) TypedProperties(org.apache.hudi.common.config.TypedProperties) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HashSet(java.util.HashSet)

Example 70 with HoodieTestDataGenerator

use of org.apache.hudi.common.testutils.HoodieTestDataGenerator in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testPendingClusteringRollback.

@Test
public void testPendingClusteringRollback() throws Exception {
    boolean populateMetaFields = true;
    // setup clustering config.
    HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(true).build();
    // start clustering, but don't commit
    List<HoodieRecord> allRecords = testInsertAndClustering(clusteringConfig, populateMetaFields, false);
    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
    assertEquals(1, pendingClusteringPlans.size());
    HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
    // complete another commit after pending clustering
    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(EAGER);
    addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
    HoodieWriteConfig config = cfgBuilder.build();
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    dataGen = new HoodieTestDataGenerator();
    String commitTime = HoodieActiveTimeline.createNewInstantTime();
    allRecords.addAll(dataGen.generateInserts(commitTime, 200));
    assertThrows(HoodieUpsertException.class, () -> writeAndVerifyBatch(client, allRecords, commitTime, populateMetaFields));
    // verify pending clustering can be rolled back (even though there is a completed commit greater than pending clustering)
    client.rollback(pendingClusteringInstant.getTimestamp());
    metaClient.reloadActiveTimeline();
    // verify there are no pending clustering instants
    assertEquals(0, ClusteringUtils.getAllPendingClusteringPlans(metaClient).count());
    // delete rollback.completed instant to mimic failed rollback of clustering. and then trigger rollback of clustering again. same rollback instant should be used.
    HoodieInstant rollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
    FileCreateUtils.deleteRollbackCommit(metaClient.getBasePath(), rollbackInstant.getTimestamp());
    metaClient.reloadActiveTimeline();
    // create replace commit requested meta file so that rollback will not throw FileNotFoundException
    // create file slice with instantTime 001 and build clustering plan including this created 001 file slice.
    HoodieClusteringPlan clusteringPlan = ClusteringTestUtils.createClusteringPlan(metaClient, pendingClusteringInstant.getTimestamp(), "1");
    // create requested replace commit
    HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder().setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
    FileCreateUtils.createRequestedReplaceCommit(metaClient.getBasePath(), pendingClusteringInstant.getTimestamp(), Option.of(requestedReplaceMetadata));
    // trigger clustering again. no new rollback instants should be generated.
    try {
        client.cluster(pendingClusteringInstant.getTimestamp(), false);
    // new replace commit metadata generated is fake one. so, clustering will fail. but the intention of test is ot check for duplicate rollback instants.
    } catch (Exception e) {
    // ignore.
    }
    metaClient.reloadActiveTimeline();
    // verify that there is no new rollback instant generated
    HoodieInstant newRollbackInstant = metaClient.getActiveTimeline().getRollbackTimeline().lastInstant().get();
    assertEquals(rollbackInstant.getTimestamp(), newRollbackInstant.getTimestamp());
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) HoodieValidationException(org.apache.hudi.exception.HoodieValidationException) IOException(java.io.IOException) HoodieCorruptedDataException(org.apache.hudi.exception.HoodieCorruptedDataException) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) HoodieInsertException(org.apache.hudi.exception.HoodieInsertException) HoodieCommitException(org.apache.hudi.exception.HoodieCommitException) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) Pair(org.apache.hudi.common.util.collection.Pair) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)97 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)52 Test (org.junit.jupiter.api.Test)51 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)44 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)38 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)31 TypedProperties (org.apache.hudi.common.config.TypedProperties)29 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)26 GenericRecord (org.apache.avro.generic.GenericRecord)25 JavaRDD (org.apache.spark.api.java.JavaRDD)25 Path (org.apache.hadoop.fs.Path)24 WriteStatus (org.apache.hudi.client.WriteStatus)22 ArrayList (java.util.ArrayList)21 Properties (java.util.Properties)21 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)18 HoodieTable (org.apache.hudi.table.HoodieTable)18 List (java.util.List)17 ValueSource (org.junit.jupiter.params.provider.ValueSource)17 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)16 IOException (java.io.IOException)15