Search in sources :

Example 61 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class TestUpdateSchemaEvolution method prepareFirstRecordCommit.

private WriteStatus prepareFirstRecordCommit(List<String> recordsStrs) throws IOException {
    // Create a bunch of records with an old version of schema
    final HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.avsc");
    final HoodieSparkTable table = HoodieSparkTable.create(config, context);
    final List<WriteStatus> statuses = jsc.parallelize(Arrays.asList(1)).map(x -> {
        List<HoodieRecord> insertRecords = new ArrayList<>();
        for (String recordStr : recordsStrs) {
            RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
            insertRecords.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
        }
        Map<String, HoodieRecord> insertRecordMap = insertRecords.stream().collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity()));
        HoodieCreateHandle<?, ?, ?, ?> createHandle = new HoodieCreateHandle(config, "100", table, insertRecords.get(0).getPartitionPath(), "f1-0", insertRecordMap, supplier);
        createHandle.write();
        return createHandle.close().get(0);
    }).collect();
    final Path commitFile = new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
    FSUtils.getFs(basePath, HoodieTestUtils.getDefaultHadoopConf()).create(commitFile);
    return statuses.get(0);
}
Also used : Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) HoodieUpsertException(org.apache.hudi.exception.HoodieUpsertException) Option(org.apache.hudi.common.util.Option) Function(java.util.function.Function) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) ArrayList(java.util.ArrayList) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieMergeHandle(org.apache.hudi.io.HoodieMergeHandle) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) SchemaTestUtil.getSchemaFromResource(org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Schema(org.apache.avro.Schema) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) InvalidRecordException(org.apache.parquet.io.InvalidRecordException) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) List(java.util.List) HoodieRecordLocation(org.apache.hudi.common.model.HoodieRecordLocation) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Executable(org.junit.jupiter.api.function.Executable) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) Assertions.assertDoesNotThrow(org.junit.jupiter.api.Assertions.assertDoesNotThrow) FSUtils(org.apache.hudi.common.fs.FSUtils) Path(org.apache.hadoop.fs.Path) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieCreateHandle(org.apache.hudi.io.HoodieCreateHandle) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable)

Example 62 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class TestBucketIdentifier method testBucketIdWithSimpleRecordKey.

@Test
public void testBucketIdWithSimpleRecordKey() {
    String recordKeyField = "_row_key";
    String indexKeyField = "_row_key";
    GenericRecord record = KeyGeneratorTestUtilities.getRecord();
    HoodieRecord hoodieRecord = new HoodieAvroRecord(new HoodieKey(KeyGenUtils.getRecordKey(record, recordKeyField, false), ""), null);
    int bucketId = BucketIdentifier.getBucketId(hoodieRecord, indexKeyField, 8);
    assert bucketId == BucketIdentifier.getBucketId(Arrays.asList(record.get(indexKeyField).toString()), 8);
}
Also used : HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.jupiter.api.Test)

Example 63 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class TestHoodieBucketIndex method testTagLocation.

@Test
public void testTagLocation() throws Exception {
    String rowKey1 = UUID.randomUUID().toString();
    String rowKey2 = UUID.randomUUID().toString();
    String rowKey3 = UUID.randomUUID().toString();
    String recordStr1 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
    String recordStr2 = "{\"_row_key\":\"" + rowKey2 + "\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
    String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
    String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
    RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
    HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
    RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
    HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
    RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
    HoodieRecord record3 = new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
    RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
    HoodieRecord record4 = new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
    JavaRDD<HoodieRecord<HoodieAvroRecord>> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
    HoodieWriteConfig config = makeConfig();
    HoodieTable table = HoodieSparkTable.create(config, context, metaClient);
    HoodieBucketIndex bucketIndex = new HoodieBucketIndex(config);
    HoodieData<HoodieRecord<HoodieAvroRecord>> taggedRecordRDD = bucketIndex.tagLocation(HoodieJavaRDD.of(recordRDD), context, table);
    assertFalse(taggedRecordRDD.collectAsList().stream().anyMatch(r -> r.isCurrentLocationKnown()));
    HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(table, SCHEMA);
    testTable.addCommit("001").withInserts("2016/01/31", getRecordFileId(record1), record1);
    testTable.addCommit("002").withInserts("2016/01/31", getRecordFileId(record2), record2);
    testTable.addCommit("003").withInserts("2016/01/31", getRecordFileId(record3), record3);
    taggedRecordRDD = bucketIndex.tagLocation(HoodieJavaRDD.of(recordRDD), context, HoodieSparkTable.create(config, context, metaClient));
    assertFalse(taggedRecordRDD.collectAsList().stream().filter(r -> r.isCurrentLocationKnown()).filter(r -> BucketIdentifier.bucketIdFromFileId(r.getCurrentLocation().getFileId()) != getRecordBucketId(r)).findAny().isPresent());
    assertTrue(taggedRecordRDD.collectAsList().stream().filter(r -> r.getPartitionPath().equals("2015/01/31") && !r.isCurrentLocationKnown()).count() == 1L);
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) HoodieClientTestHarness(org.apache.hudi.testutils.HoodieClientTestHarness) Logger(org.apache.log4j.Logger) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) JavaRDD(org.apache.spark.api.java.JavaRDD) SchemaTestUtil.getSchemaFromResource(org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieIndexException(org.apache.hudi.exception.HoodieIndexException) HoodieData(org.apache.hudi.common.data.HoodieData) Schema(org.apache.avro.Schema) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) KeyGeneratorOptions(org.apache.hudi.keygen.constant.KeyGeneratorOptions) UUID(java.util.UUID) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) AfterEach(org.junit.jupiter.api.AfterEach) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) LogManager(org.apache.log4j.LogManager) RawTripTestPayload(org.apache.hudi.common.testutils.RawTripTestPayload) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTable(org.apache.hudi.table.HoodieTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) Test(org.junit.jupiter.api.Test)

Example 64 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testDelete.

@Test
public void testDelete() throws Exception {
    final String newCommitTime = "001";
    final int numRecords = 10;
    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, numRecords);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    // Load to memory
    HoodieWriteConfig config = getConfig();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        // Test tagLocation without any entries in index
        JavaRDD<HoodieRecord> records1 = tagLocation(index, writeRecords, hoodieTable);
        assertEquals(0, records1.filter(record -> record.isCurrentLocationKnown()).count());
        // Insert records
        writeClient.startCommitWithTime(newCommitTime);
        JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
        assertNoWriteErrors(writeStatues.collect());
        writeClient.commit(newCommitTime, writeStatues);
        // Now tagLocation for these records, hbaseIndex should tag them correctly
        metaClient = HoodieTableMetaClient.reload(metaClient);
        hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        List<HoodieRecord> records2 = tagLocation(index, writeRecords, hoodieTable).collect();
        assertEquals(numRecords, records2.stream().filter(record -> record.isCurrentLocationKnown()).count());
        assertEquals(numRecords, records2.stream().map(record -> record.getKey().getRecordKey()).distinct().count());
        assertEquals(numRecords, records2.stream().filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
        // Delete all records. This has to be done directly as deleting index entries
        // is not implemented via HoodieWriteClient
        JavaRDD<WriteStatus> deleteWriteStatues = writeStatues.map(w -> {
            WriteStatus newWriteStatus = new WriteStatus(true, 1.0);
            w.getWrittenRecords().forEach(r -> newWriteStatus.markSuccess(new HoodieAvroRecord(r.getKey(), null), Option.empty()));
            assertEquals(w.getTotalRecords(), newWriteStatus.getTotalRecords());
            newWriteStatus.setStat(new HoodieWriteStat());
            return newWriteStatus;
        });
        // if not for this caching, due to RDD chaining/lineage, first time update is called again when subsequent update is called.
        // So caching here to break the chain and so future update does not re-trigger update of older Rdd.
        deleteWriteStatues.cache();
        JavaRDD<WriteStatus> deleteStatus = updateLocation(index, deleteWriteStatues, hoodieTable);
        assertEquals(deleteStatus.count(), deleteWriteStatues.count());
        assertNoWriteErrors(deleteStatus.collect());
        // Ensure no records can be tagged
        List<HoodieRecord> records3 = tagLocation(index, writeRecords, hoodieTable).collect();
        assertEquals(0, records3.stream().filter(record -> record.isCurrentLocationKnown()).count());
        assertEquals(numRecords, records3.stream().map(record -> record.getKey().getRecordKey()).distinct().count());
        assertEquals(0, records3.stream().filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) Result(org.apache.hadoop.hbase.client.Result) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) AfterAll(org.junit.jupiter.api.AfterAll) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) BeforeAll(org.junit.jupiter.api.BeforeAll) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) Tag(org.junit.jupiter.api.Tag) Get(org.apache.hadoop.hbase.client.Get) UUID(java.util.UUID) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) List(java.util.List) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Mockito.atMost(org.mockito.Mockito.atMost) Mockito.mock(org.mockito.Mockito.mock) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Option(org.apache.hudi.common.util.Option) EnumSource(org.junit.jupiter.params.provider.EnumSource) HashMap(java.util.HashMap) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HTable(org.apache.hadoop.hbase.client.HTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) EmptyHoodieRecordPayload(org.apache.hudi.common.model.EmptyHoodieRecordPayload) LinkedList(java.util.LinkedList) JavaRDD(org.apache.spark.api.java.JavaRDD) Bytes(org.apache.hadoop.hbase.util.Bytes) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) TableName(org.apache.hadoop.hbase.TableName) TestMethodOrder(org.junit.jupiter.api.TestMethodOrder) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Put(org.apache.hadoop.hbase.client.Put) HoodieHBaseIndexConfig(org.apache.hudi.config.HoodieHBaseIndexConfig) IOException(java.io.IOException) Mockito.times(org.mockito.Mockito.times) Mockito.when(org.mockito.Mockito.when) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) MethodOrderer(org.junit.jupiter.api.MethodOrderer) Mockito.verify(org.mockito.Mockito.verify) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) AfterEach(org.junit.jupiter.api.AfterEach) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Connection(org.apache.hadoop.hbase.client.Connection) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieTable(org.apache.hudi.table.HoodieTable) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 65 with HoodieAvroRecord

use of org.apache.hudi.common.model.HoodieAvroRecord in project hudi by apache.

the class TestSparkHoodieHBaseIndex method testTagLocationAndPartitionPathUpdateWithExplicitRollback.

@Test
public void testTagLocationAndPartitionPathUpdateWithExplicitRollback() throws Exception {
    final int numRecords = 10;
    final String oldPartitionPath = "1970/01/01";
    final String emptyHoodieRecordPayloadClasssName = EmptyHoodieRecordPayload.class.getName();
    HoodieWriteConfig config = getConfigBuilder(100, true, true).withRollbackUsingMarkers(false).build();
    SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        final String firstCommitTime = writeClient.startCommit();
        List<HoodieRecord> newRecords = dataGen.generateInserts(firstCommitTime, numRecords);
        List<HoodieRecord> oldRecords = new LinkedList();
        for (HoodieRecord newRecord : newRecords) {
            HoodieKey key = new HoodieKey(newRecord.getRecordKey(), oldPartitionPath);
            HoodieRecord hoodieRecord = new HoodieAvroRecord(key, (HoodieRecordPayload) newRecord.getData());
            oldRecords.add(hoodieRecord);
        }
        JavaRDD<HoodieRecord> newWriteRecords = jsc().parallelize(newRecords, 1);
        JavaRDD<HoodieRecord> oldWriteRecords = jsc().parallelize(oldRecords, 1);
        // first commit old record
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        List<HoodieRecord> beforeFirstTaggedRecords = tagLocation(index, oldWriteRecords, hoodieTable).collect();
        JavaRDD<WriteStatus> oldWriteStatues = writeClient.upsert(oldWriteRecords, firstCommitTime);
        updateLocation(index, oldWriteStatues, hoodieTable);
        writeClient.commit(firstCommitTime, oldWriteStatues);
        List<HoodieRecord> afterFirstTaggedRecords = tagLocation(index, oldWriteRecords, hoodieTable).collect();
        metaClient = HoodieTableMetaClient.reload(metaClient);
        hoodieTable = HoodieSparkTable.create(config, context, metaClient);
        final String secondCommitTime = writeClient.startCommit();
        List<HoodieRecord> beforeSecondTaggedRecords = tagLocation(index, newWriteRecords, hoodieTable).collect();
        JavaRDD<WriteStatus> newWriteStatues = writeClient.upsert(newWriteRecords, secondCommitTime);
        updateLocation(index, newWriteStatues, hoodieTable);
        writeClient.commit(secondCommitTime, newWriteStatues);
        List<HoodieRecord> afterSecondTaggedRecords = tagLocation(index, newWriteRecords, hoodieTable).collect();
        writeClient.rollback(secondCommitTime);
        List<HoodieRecord> afterRollback = tagLocation(index, newWriteRecords, hoodieTable).collect();
        // Verify the first commit
        assertEquals(numRecords, beforeFirstTaggedRecords.stream().filter(record -> record.getCurrentLocation() == null).count());
        assertEquals(numRecords, afterFirstTaggedRecords.stream().filter(HoodieRecord::isCurrentLocationKnown).count());
        // Verify the second commit
        assertEquals(numRecords, beforeSecondTaggedRecords.stream().filter(record -> record.getKey().getPartitionPath().equals(oldPartitionPath) && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClasssName)).count());
        assertEquals(numRecords * 2, beforeSecondTaggedRecords.stream().count());
        assertEquals(numRecords, afterSecondTaggedRecords.stream().count());
        assertEquals(numRecords, afterSecondTaggedRecords.stream().filter(record -> !record.getKey().getPartitionPath().equals(oldPartitionPath)).count());
        // Verify the rollback
        // If an exception occurs after hbase writes the index and the index does not roll back,
        // the currentLocation information will not be returned.
        assertEquals(numRecords, afterRollback.stream().filter(record -> record.getKey().getPartitionPath().equals(oldPartitionPath) && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClasssName)).count());
        assertEquals(numRecords * 2, beforeSecondTaggedRecords.stream().count());
        assertEquals(numRecords, afterRollback.stream().filter(HoodieRecord::isCurrentLocationKnown).filter(record -> record.getCurrentLocation().getInstantTime().equals(firstCommitTime)).count());
    }
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) LinkedList(java.util.LinkedList) HoodieAvroRecord(org.apache.hudi.common.model.HoodieAvroRecord) HoodieKey(org.apache.hudi.common.model.HoodieKey) HoodieTable(org.apache.hudi.table.HoodieTable) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieAvroRecord (org.apache.hudi.common.model.HoodieAvroRecord)84 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)72 HoodieKey (org.apache.hudi.common.model.HoodieKey)68 ArrayList (java.util.ArrayList)38 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)37 RawTripTestPayload (org.apache.hudi.common.testutils.RawTripTestPayload)31 Test (org.junit.jupiter.api.Test)30 GenericRecord (org.apache.avro.generic.GenericRecord)29 Path (org.apache.hadoop.fs.Path)26 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)25 IOException (java.io.IOException)24 HoodieTable (org.apache.hudi.table.HoodieTable)24 List (java.util.List)23 Schema (org.apache.avro.Schema)23 HashMap (java.util.HashMap)22 Pair (org.apache.hudi.common.util.collection.Pair)21 Map (java.util.Map)20 Collectors (java.util.stream.Collectors)20 Arrays (java.util.Arrays)17 Option (org.apache.hudi.common.util.Option)16