Search in sources :

Example 81 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testSimpleTagLocationAndUpdateWithRollback.

@Test
public void testSimpleTagLocationAndUpdateWithRollback() throws Exception {
    // Load to memory
    HoodieWriteConfig config = getConfigBuilder(100, false, false).withRollbackUsingMarkers(false).build();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
    final String newCommitTime = writeClient.startCommit();
    final int numRecords = 10;
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, numRecords);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    // Insert 200 records
    JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
    assertNoWriteErrors(writeStatues.collect());
    // commit this upsert
    writeClient.commit(newCommitTime, writeStatues);
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    // Now tagLocation for these records, hbaseIndex should tag them
    List<HoodieRecord> records2 = tagLocation(index, writeRecords, hoodieTable).collect();
    assertEquals(numRecords, records2.stream().filter(HoodieRecord::isCurrentLocationKnown).count());
    // check tagged records are tagged with correct fileIds
    List<String> fileIds = writeStatues.map(WriteStatus::getFileId).collect();
    assertEquals(0, records2.stream().filter(record -> record.getCurrentLocation().getFileId() == null).count());
    List<String> taggedFileIds = records2.stream().map(record -> record.getCurrentLocation().getFileId()).distinct().collect(Collectors.toList());
    // both lists should match
    assertTrue(taggedFileIds.containsAll(fileIds) && fileIds.containsAll(taggedFileIds));
    // Rollback the last commit
    writeClient.rollback(newCommitTime);
    hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    // Now tagLocation for these records, hbaseIndex should not tag them since it was a rolled
    // back commit
    List<HoodieRecord> records3 = tagLocation(index, writeRecords, hoodieTable).collect();
    assertEquals(0, records3.stream().filter(HoodieRecord::isCurrentLocationKnown).count());
    assertEquals(0, records3.stream().filter(record -> record.getCurrentLocation() != null).count());
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 82 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testsWriteStatusPartitioner.

@Test
public void testsWriteStatusPartitioner() {
    HoodieWriteConfig config = getConfig();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    int parallelism = 4;
    final JavaRDD<WriteStatus> writeStatusRDD = jsc().parallelize(Arrays.asList(getSampleWriteStatusWithFileId(0, 2), getSampleWriteStatusWithFileId(2, 3), getSampleWriteStatusWithFileId(4, 3), getSampleWriteStatusWithFileId(0, 3), getSampleWriteStatusWithFileId(11, 0)), parallelism);
    final Map<String, Integer> fileIdPartitionMap = index.mapFileWithInsertsToUniquePartition(writeStatusRDD);
    int numWriteStatusWithInserts = (int) index.getHBasePutAccessParallelism(writeStatusRDD)._2;
    JavaRDD<WriteStatus> partitionedRDD = writeStatusRDD.mapToPair(w -> new Tuple2<>(w.getFileId(), w)).partitionBy(new SparkHoodieHBaseIndex.WriteStatusPartitioner(fileIdPartitionMap, numWriteStatusWithInserts)).map(w -> w._2());
    assertEquals(numWriteStatusWithInserts, partitionedRDD.getNumPartitions());
    int[] partitionIndexesBeforeRepartition = writeStatusRDD.partitions().stream().mapToInt(p -> p.index()).toArray();
    assertEquals(parallelism, partitionIndexesBeforeRepartition.length);
    int[] partitionIndexesAfterRepartition = partitionedRDD.partitions().stream().mapToInt(p -> p.index()).toArray();
    // there should be 3 partitions after repartition, because only 3 writestatus has
    // inserts (numWriteStatusWithInserts)
    assertEquals(numWriteStatusWithInserts, partitionIndexesAfterRepartition.length);
    List<WriteStatus>[] writeStatuses = partitionedRDD.collectPartitions(partitionIndexesAfterRepartition);
    for (List<WriteStatus> list : writeStatuses) {
        int count = 0;
        for (WriteStatus w : list) {
            if (w.getStat().getNumInserts() > 0) {
                count++;
            }
        }
        assertEquals(1, count);
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) Result(org.apache.hadoop.hbase.client.Result) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) Get(org.apache.hadoop.hbase.client.Get) UUID(java.util.UUID) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Mockito.atMost(org.mockito.Mockito.atMost) Mockito.mock(org.mockito.Mockito.mock) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HTable(org.apache.hadoop.hbase.client.HTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) JavaRDD(org.apache.spark.api.java.JavaRDD) Bytes(org.apache.hadoop.hbase.util.Bytes) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableName(org.apache.hadoop.hbase.TableName) TestMethodOrder(org.junit.jupiter.api.TestMethodOrder) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Put(org.apache.hadoop.hbase.client.Put) HoodieHBaseIndexConfig(org.apache.hudi.config.HoodieHBaseIndexConfig) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) MethodOrderer(org.junit.jupiter.api.MethodOrderer) Mockito.verify(org.mockito.Mockito.verify) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Connection(org.apache.hadoop.hbase.client.Connection) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) LinkedList(java.util.LinkedList) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 83 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testSmallBatchSize.

@Test
public void testSmallBatchSize() throws Exception {
    final String newCommitTime = "001";
    final int numRecords = 10;
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, numRecords);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    // Load to memory
    HoodieWriteConfig config = getConfig(2);
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        // Test tagLocation without any entries in index
        JavaRDD<HoodieRecord> records1 = tagLocation(index, writeRecords, hoodieTable);
        assertEquals(0, records1.filter(record -> record.isCurrentLocationKnown()).count());
        // Insert 200 records
        writeClient.startCommitWithTime(newCommitTime);
        JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
        assertNoWriteErrors(writeStatues.collect());
        // Now tagLocation for these records, hbaseIndex should not tag them since it was a failed
        // commit
        JavaRDD<HoodieRecord> records2 = tagLocation(index, writeRecords, hoodieTable);
        assertEquals(0, records2.filter(record -> record.isCurrentLocationKnown()).count());
        // Now commit this & update location of records inserted and validate no errors
        writeClient.commit(newCommitTime, writeStatues);
        // Now tagLocation for these records, hbaseIndex should tag them correctly
        metaClient = HoodieTableMetaClient.reload(metaClient);
        hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        List<HoodieRecord> records3 = tagLocation(index, writeRecords, hoodieTable).collect();
        assertEquals(numRecords, records3.stream().filter(record -> record.isCurrentLocationKnown()).count());
        assertEquals(numRecords, records3.stream().map(record -> record.getKey().getRecordKey()).distinct().count());
        assertEquals(numRecords, records3.stream().filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) Result(org.apache.hadoop.hbase.client.Result) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) Get(org.apache.hadoop.hbase.client.Get) UUID(java.util.UUID) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Mockito.atMost(org.mockito.Mockito.atMost) Mockito.mock(org.mockito.Mockito.mock) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HTable(org.apache.hadoop.hbase.client.HTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) JavaRDD(org.apache.spark.api.java.JavaRDD) Bytes(org.apache.hadoop.hbase.util.Bytes) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableName(org.apache.hadoop.hbase.TableName) TestMethodOrder(org.junit.jupiter.api.TestMethodOrder) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Put(org.apache.hadoop.hbase.client.Put) HoodieHBaseIndexConfig(org.apache.hudi.config.HoodieHBaseIndexConfig) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) MethodOrderer(org.junit.jupiter.api.MethodOrderer) Mockito.verify(org.mockito.Mockito.verify) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Connection(org.apache.hadoop.hbase.client.Connection) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 84 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testTotalPutsBatching.

@Test
public void testTotalPutsBatching() throws Exception {
    HoodieWriteConfig config = getConfig();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
    // start a commit and generate test data
    String newCommitTime = writeClient.startCommit();
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 250);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
    // Insert 200 records
    JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
    // commit this upsert
    writeClient.commit(newCommitTime, writeStatues);
    // Mock hbaseConnection and related entities
    Connection hbaseConnection = mock(Connection.class);
    HTable table = mock(HTable.class);
    when(hbaseConnection.getTable(TableName.valueOf(TABLE_NAME))).thenReturn(table);
    when(table.get((List<Get>) any())).thenReturn(new Result[0]);
    // only for test, set the hbaseConnection to mocked object
    index.setHbaseConnection(hbaseConnection);
    // Get all the files generated
    int numberOfDataFileIds = (int) writeStatues.map(status -> status.getFileId()).distinct().count();
    updateLocation(index, writeStatues, hoodieTable);
    // 3 batches should be executed given batchSize = 100 and <=numberOfDataFileIds getting updated,
    // so each fileId ideally gets updates
    verify(table, atMost(numberOfDataFileIds)).put((List<Put>) any());
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) Result(org.apache.hadoop.hbase.client.Result) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) Get(org.apache.hadoop.hbase.client.Get) UUID(java.util.UUID) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Mockito.atMost(org.mockito.Mockito.atMost) Mockito.mock(org.mockito.Mockito.mock) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HTable(org.apache.hadoop.hbase.client.HTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) JavaRDD(org.apache.spark.api.java.JavaRDD) Bytes(org.apache.hadoop.hbase.util.Bytes) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableName(org.apache.hadoop.hbase.TableName) TestMethodOrder(org.junit.jupiter.api.TestMethodOrder) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Put(org.apache.hadoop.hbase.client.Put) HoodieHBaseIndexConfig(org.apache.hudi.config.HoodieHBaseIndexConfig) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) MethodOrderer(org.junit.jupiter.api.MethodOrderer) Mockito.verify(org.mockito.Mockito.verify) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Connection(org.apache.hadoop.hbase.client.Connection) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Connection(org.apache.hadoop.hbase.client.Connection) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HTable(org.apache.hadoop.hbase.client.HTable) Put(org.apache.hadoop.hbase.client.Put) HoodieTable(org.apache.hudi.table.HoodieTable) Get(org.apache.hadoop.hbase.client.Get) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 85 with HoodieWriteConfig

use of org.apache.hudi.config.HoodieWriteConfig in project hudi by apache.

the class DeltaSync method reInitWriteClient.

private void reInitWriteClient(Schema sourceSchema, Schema targetSchema) throws IOException {
    LOG.info("Setting up new Hoodie Write Client");
    registerAvroSchemas(sourceSchema, targetSchema);
    HoodieWriteConfig hoodieCfg = getHoodieClientConfig(targetSchema);
    if (hoodieCfg.isEmbeddedTimelineServerEnabled()) {
        if (!embeddedTimelineService.isPresent()) {
            embeddedTimelineService = EmbeddedTimelineServerHelper.createEmbeddedTimelineService(new HoodieSparkEngineContext(jssc), hoodieCfg);
        } else {
            EmbeddedTimelineServerHelper.updateWriteConfigWithTimelineServer(embeddedTimelineService.get(), hoodieCfg);
        }
    }
    if (null != writeClient) {
        // Close Write client.
        writeClient.close();
    }
    writeClient = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jssc), hoodieCfg, embeddedTimelineService);
    onInitializingHoodieWriteClient.apply(writeClient);
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig)

Aggregations

HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)327 Test (org.junit.jupiter.api.Test)179 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)173 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)169 ArrayList (java.util.ArrayList)136 List (java.util.List)133 SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)126 HoodieTable (org.apache.hudi.table.HoodieTable)117 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)111 HashMap (java.util.HashMap)93 Path (org.apache.hadoop.fs.Path)92 WriteStatus (org.apache.hudi.client.WriteStatus)86 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)84 Collectors (java.util.stream.Collectors)81 Map (java.util.Map)76 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)76 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)74 Arrays (java.util.Arrays)73 HoodieSparkTable (org.apache.hudi.table.HoodieSparkTable)72 Option (org.apache.hudi.common.util.Option)69