Search in sources :

Example 21 with SparkRDDWriteClient

use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableRollback method testMORTableRestore.

@ParameterizedTest
@ValueSource(booleans = { true, false })
void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false).withMarkersType(MarkerType.DIRECT.name());
    HoodieWriteConfig cfg = cfgBuilder.build();
    Properties properties = new Properties();
    properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
    HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
    try (final SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
        List<HoodieRecord> records = insertAndGetRecords("001", client, dataGen, 200);
        List<HoodieRecord> updates1 = updateAndGetRecords("002", client, dataGen, records);
        List<HoodieRecord> updates2 = updateAndGetRecords("003", client, dataGen, records);
        List<HoodieRecord> updates3 = updateAndGetRecords("004", client, dataGen, records);
        validateRecords(cfg, metaClient, updates3);
        if (!restoreAfterCompaction) {
            // restore to 002 and validate records.
            client.restoreToInstant("002");
            validateRecords(cfg, metaClient, updates1);
        } else {
            // trigger compaction and then trigger couple of upserts followed by restore.
            metaClient = HoodieTableMetaClient.reload(metaClient);
            String compactionInstantTime = "005";
            client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
            HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
            client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
            validateRecords(cfg, metaClient, updates3);
            List<HoodieRecord> updates4 = updateAndGetRecords("006", client, dataGen, records);
            List<HoodieRecord> updates5 = updateAndGetRecords("007", client, dataGen, records);
            validateRecords(cfg, metaClient, updates5);
            // restore to 003 and validate records.
            client.restoreToInstant("003");
            validateRecords(cfg, metaClient, updates2);
        }
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) JavaRDD(org.apache.spark.api.java.JavaRDD) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 22 with SparkRDDWriteClient

use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableRollback method updateRecords.

private List<HoodieRecord> updateRecords(SparkRDDWriteClient client, HoodieTestDataGenerator dataGen, String commitTime, List<HoodieRecord> records, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, boolean assertLogFiles) throws IOException {
    client.startCommitWithTime(commitTime);
    records = dataGen.generateUpdates(commitTime, records);
    JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
    List<WriteStatus> statuses = client.upsert(writeRecords, commitTime).collect();
    assertNoWriteErrors(statuses);
    if (assertLogFiles) {
        HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
        table.getHoodieView().sync();
        TableFileSystemView.SliceView tableRTFileSystemView = table.getSliceView();
        long numLogFiles = 0;
        for (String partitionPath : dataGen.getPartitionPaths()) {
            List<FileSlice> allSlices = tableRTFileSystemView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
            assertEquals(1, allSlices.stream().filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
            assertTrue(allSlices.stream().anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
            numLogFiles += allSlices.stream().filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
        }
        assertTrue(numLogFiles > 0);
    }
    return records;
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieMergeOnReadTestUtils(org.apache.hudi.testutils.HoodieMergeOnReadTestUtils) Arrays(java.util.Arrays) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Path(org.apache.hadoop.fs.Path) Assertions.assertAll(org.junit.jupiter.api.Assertions.assertAll) Tag(org.junit.jupiter.api.Tag) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MarkerType(org.apache.hudi.common.table.marker.MarkerType) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) SyncableFileSystemView(org.apache.hudi.common.table.view.SyncableFileSystemView) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) HoodieIOException(org.apache.hudi.exception.HoodieIOException) Pair(org.apache.hudi.common.util.collection.Pair) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTable(org.apache.hudi.table.HoodieTable) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) WriteStatus(org.apache.hudi.client.WriteStatus)

Example 23 with SparkRDDWriteClient

use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.

the class TestMarkerBasedRollbackStrategy method testCopyOnWriteRollback.

@ParameterizedTest(name = TEST_NAME_WITH_PARAMS)
@MethodSource("configParams")
public void testCopyOnWriteRollback(boolean useFileListingMetadata) throws Exception {
    HoodieWriteConfig writeConfig = getConfigBuilder().withRollbackUsingMarkers(true).withAutoCommit(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(useFileListingMetadata).build()).withPath(basePath).build();
    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
    try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, writeConfig)) {
        // rollback 2nd commit and ensure stats reflect the info.
        List<HoodieRollbackStat> stats = testRun(useFileListingMetadata, writeConfig, writeClient);
        assertEquals(3, stats.size());
        for (HoodieRollbackStat stat : stats) {
            assertEquals(1, stat.getSuccessDeleteFiles().size());
            assertEquals(0, stat.getFailedDeleteFiles().size());
            assertEquals(0, stat.getCommandBlocksCount().size());
        }
    }
}
Also used : HoodieSparkEngineContext(org.apache.hudi.client.common.HoodieSparkEngineContext) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 24 with SparkRDDWriteClient

use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.

the class CompactionTestBase method executeCompactionWithReplacedFiles.

protected void executeCompactionWithReplacedFiles(String compactionInstantTime, SparkRDDWriteClient client, HoodieTable table, HoodieWriteConfig cfg, String[] partitions, Set<HoodieFileGroupId> replacedFileIds) throws IOException {
    client.compact(compactionInstantTime);
    List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table);
    assertTrue(fileSliceList.stream().findAny().isPresent(), "Ensure latest file-slices are not empty");
    assertFalse(fileSliceList.stream().anyMatch(fs -> replacedFileIds.contains(fs.getFileGroupId())), "Compacted files should not show up in latest slices");
    // verify that there is a commit
    table = getHoodieTable(HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).setLoadActiveTimelineOnLoad(true).build(), cfg);
    HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
    // verify compaction commit is visible in timeline
    assertTrue(timeline.filterCompletedInstants().getInstants().filter(instant -> compactionInstantTime.equals(instant.getTimestamp())).findFirst().isPresent());
    for (String partition : partitions) {
        table.getSliceView().getLatestFileSlicesBeforeOrOn(partition, compactionInstantTime, true).forEach(fs -> {
            // verify that all log files are merged
            assertEquals(0, fs.getLogFiles().count());
            assertTrue(fs.getBaseFile().isPresent());
        });
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) HoodieStorageConfig(org.apache.hudi.config.HoodieStorageConfig) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) FileSystemViewStorageType(org.apache.hudi.common.table.view.FileSystemViewStorageType) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) WriteMarkersFactory(org.apache.hudi.table.marker.WriteMarkersFactory) TRIP_EXAMPLE_SCHEMA(org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieCompactionOperation(org.apache.hudi.avro.model.HoodieCompactionOperation) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline)

Example 25 with SparkRDDWriteClient

use of org.apache.hudi.client.SparkRDDWriteClient in project hudi by apache.

the class TestAsyncCompaction method testRollbackInflightIngestionWithPendingCompaction.

@Test
public void testRollbackInflightIngestionWithPendingCompaction() throws Exception {
    // Rollback inflight ingestion when there is pending compaction
    HoodieWriteConfig cfg = getConfig(false);
    String firstInstantTime = "001";
    String secondInstantTime = "004";
    String compactionInstantTime = "005";
    String inflightInstantTime = "006";
    String nextInflightInstantTime = "007";
    int numRecs = 2000;
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieReadClient readClient = getHoodieReadClient(cfg.getBasePath());
        List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
        records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true, new ArrayList<>());
        // Schedule compaction but do not run them
        scheduleCompaction(compactionInstantTime, client, cfg);
        HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        HoodieInstant pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
        HoodieInstant inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstantTime, inflightInstant.getTimestamp(), "inflight instant has expected instant time");
        // This should rollback
        client.startCommitWithTime(nextInflightInstantTime);
        // Validate
        metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
        inflightInstant = metaClient.getActiveTimeline().filterPendingExcludingCompaction().firstInstant().get();
        assertEquals(inflightInstant.getTimestamp(), nextInflightInstantTime, "inflight instant has expected instant time");
        assertEquals(1, metaClient.getActiveTimeline().filterPendingExcludingCompaction().getInstants().count(), "Expect only one inflight instant");
        // Expect pending Compaction to be present
        pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
        assertEquals(compactionInstantTime, pendingCompactionInstant.getTimestamp(), "Pending Compaction instant has expected instant time");
    }
}
Also used : HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Test(org.junit.jupiter.api.Test)

Aggregations

SparkRDDWriteClient (org.apache.hudi.client.SparkRDDWriteClient)143 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)127 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)113 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)86 Test (org.junit.jupiter.api.Test)80 WriteStatus (org.apache.hudi.client.WriteStatus)76 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)74 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)61 List (java.util.List)59 ArrayList (java.util.ArrayList)51 HoodieTable (org.apache.hudi.table.HoodieTable)51 Path (org.apache.hadoop.fs.Path)47 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)47 JavaRDD (org.apache.spark.api.java.JavaRDD)47 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)44 Collectors (java.util.stream.Collectors)43 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)43 HoodieCompactionConfig (org.apache.hudi.config.HoodieCompactionConfig)42 HashMap (java.util.HashMap)41 Properties (java.util.Properties)41