Search in sources :

Example 6 with BaseFileOnlyView

use of org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView in project hudi by apache.

the class TestHoodieClientOnCopyOnWriteStorage method testSmallInsertHandlingForUpserts.

/**
 * Test scenario of new file-group getting added during upsert().
 */
@Test
public void testSmallInsertHandlingForUpserts() throws Exception {
    final String testPartitionPath = "2016/09/26";
    final int insertSplitLimit = 100;
    // setup the small file handling params
    // hold upto 200 records max
    HoodieWriteConfig config = getSmallInsertWriteConfig(insertSplitLimit, TRIP_EXAMPLE_SCHEMA, dataGen.getEstimatedFileSizeInBytes(150));
    dataGen = new HoodieTestDataGenerator(new String[] { testPartitionPath });
    SparkRDDWriteClient client = getHoodieWriteClient(config);
    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
    // Inserts => will write file1
    String commitTime1 = "001";
    client.startCommitWithTime(commitTime1);
    // this writes ~500kb
    List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, insertSplitLimit);
    Set<String> keys1 = recordsToRecordKeySet(inserts1);
    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
    List<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1).collect();
    assertNoWriteErrors(statuses);
    assertEquals(1, statuses.size(), "Just 1 file needs to be added.");
    String file1 = statuses.get(0).getFileId();
    assertEquals(100, fileUtils.readRowKeys(hadoopConf, new Path(basePath, statuses.get(0).getStat().getPath())).size(), "file should contain 100 records");
    // Update + Inserts such that they just expand file1
    String commitTime2 = "002";
    client.startCommitWithTime(commitTime2);
    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
    Set<String> keys2 = recordsToRecordKeySet(inserts2);
    List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
    insertsAndUpdates2.addAll(inserts2);
    insertsAndUpdates2.addAll(dataGen.generateUpdates(commitTime2, inserts1));
    JavaRDD<HoodieRecord> insertAndUpdatesRDD2 = jsc.parallelize(insertsAndUpdates2, 1);
    statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
    assertNoWriteErrors(statuses);
    assertEquals(1, statuses.size(), "Just 1 file needs to be updated.");
    assertEquals(file1, statuses.get(0).getFileId(), "Existing file should be expanded");
    assertEquals(commitTime1, statuses.get(0).getStat().getPrevCommit(), "Existing file should be expanded");
    Path newFile = new Path(basePath, statuses.get(0).getStat().getPath());
    assertEquals(140, fileUtils.readRowKeys(hadoopConf, newFile).size(), "file should contain 140 records");
    List<GenericRecord> records = fileUtils.readAvroRecords(hadoopConf, newFile);
    for (GenericRecord record : records) {
        String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
        assertEquals(commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit2");
        assertTrue(keys2.contains(recordKey) || keys1.contains(recordKey), "key expected to be part of commit2");
    }
    // update + inserts such that file1 is updated and expanded, a new file2 is created.
    String commitTime3 = "003";
    client.startCommitWithTime(commitTime3);
    List<HoodieRecord> insertsAndUpdates3 = dataGen.generateInserts(commitTime3, 200);
    Set<String> keys3 = recordsToRecordKeySet(insertsAndUpdates3);
    List<HoodieRecord> updates3 = dataGen.generateUpdates(commitTime3, inserts2);
    insertsAndUpdates3.addAll(updates3);
    JavaRDD<HoodieRecord> insertAndUpdatesRDD3 = jsc.parallelize(insertsAndUpdates3, 1);
    statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
    assertNoWriteErrors(statuses);
    assertEquals(2, statuses.size(), "2 files needs to be committed.");
    HoodieTableMetaClient metadata = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
    HoodieTable table = getHoodieTable(metadata, config);
    BaseFileOnlyView fileSystemView = table.getBaseFileOnlyView();
    List<HoodieBaseFile> files = fileSystemView.getLatestBaseFilesBeforeOrOn(testPartitionPath, commitTime3).collect(Collectors.toList());
    int numTotalInsertsInCommit3 = 0;
    int numTotalUpdatesInCommit3 = 0;
    for (HoodieBaseFile file : files) {
        if (file.getFileName().contains(file1)) {
            assertEquals(commitTime3, file.getCommitTime(), "Existing file should be expanded");
            records = fileUtils.readAvroRecords(hadoopConf, new Path(file.getPath()));
            for (GenericRecord record : records) {
                String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                String recordCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
                if (recordCommitTime.equals(commitTime3)) {
                    if (keys2.contains(recordKey)) {
                        keys2.remove(recordKey);
                        numTotalUpdatesInCommit3++;
                    } else {
                        numTotalInsertsInCommit3++;
                    }
                }
            }
            assertEquals(0, keys2.size(), "All keys added in commit 2 must be updated in commit3 correctly");
        } else {
            assertEquals(commitTime3, file.getCommitTime(), "New file must be written for commit 3");
            records = fileUtils.readAvroRecords(hadoopConf, new Path(file.getPath()));
            for (GenericRecord record : records) {
                String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                assertEquals(commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString(), "only expect commit3");
                assertTrue(keys3.contains(recordKey), "key expected to be part of commit3");
            }
            numTotalInsertsInCommit3 += records.size();
        }
    }
    assertEquals(numTotalUpdatesInCommit3, inserts2.size(), "Total updates in commit3 must add up");
    assertEquals(numTotalInsertsInCommit3, keys3.size(), "Total inserts in commit3 must add up");
}
Also used : Path(org.apache.hadoop.fs.Path) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) ArrayList(java.util.ArrayList) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) BaseFileUtils(org.apache.hudi.common.util.BaseFileUtils) GenericRecord(org.apache.avro.generic.GenericRecord) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Test(org.junit.jupiter.api.Test)

Example 7 with BaseFileOnlyView

use of org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView in project hudi by apache.

the class TestHoodieMergeOnReadTable method testHandleUpdateWithMultiplePartitions.

/**
 * Test to validate invoking table.handleUpdate() with input records from multiple partitions will fail.
 */
@Test
public void testHandleUpdateWithMultiplePartitions() throws Exception {
    HoodieWriteConfig cfg = getConfig(true);
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        /**
         * Write 1 (only inserts, written as base file)
         */
        String newCommitTime = "001";
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
        JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
        assertNoWriteErrors(statuses);
        HoodieSparkMergeOnReadTable hoodieTable = (HoodieSparkMergeOnReadTable) HoodieSparkTable.create(cfg, context(), metaClient);
        Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
        assertTrue(deltaCommit.isPresent());
        assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
        Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
        assertFalse(commit.isPresent());
        FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
        BaseFileOnlyView roView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
        Stream<HoodieBaseFile> dataFilesToRead = roView.getLatestBaseFiles();
        assertFalse(dataFilesToRead.findAny().isPresent());
        roView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
        dataFilesToRead = roView.getLatestBaseFiles();
        assertTrue(dataFilesToRead.findAny().isPresent(), "should list the base files we wrote in the delta commit");
        /**
         * Write 2 (only updates, written to .log file)
         */
        newCommitTime = "002";
        client.startCommitWithTime(newCommitTime);
        metaClient.reloadActiveTimeline();
        records = dataGen.generateUpdates(newCommitTime, records);
        writeRecords = jsc().parallelize(records, 1);
        statuses = client.upsert(writeRecords, newCommitTime).collect();
        assertNoWriteErrors(statuses);
        /**
         * Write 3 (only deletes, written to .log file)
         */
        final String newDeleteTime = "004";
        final String partitionPath = records.get(0).getPartitionPath();
        final String fileId = statuses.get(0).getFileId();
        client.startCommitWithTime(newDeleteTime);
        metaClient.reloadActiveTimeline();
        List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletesFromExistingRecords(records);
        JavaRDD<HoodieRecord> deleteRDD = jsc().parallelize(fewRecordsForDelete, 1);
        // initialize partitioner
        hoodieTable.getHoodieView().sync();
        BaseSparkDeltaCommitActionExecutor actionExecutor = new SparkDeleteDeltaCommitActionExecutor(context(), cfg, hoodieTable, newDeleteTime, HoodieJavaRDD.of(deleteRDD));
        actionExecutor.getUpsertPartitioner(new WorkloadProfile(buildProfile(deleteRDD)));
        final List<List<WriteStatus>> deleteStatus = jsc().parallelize(Arrays.asList(1)).map(x -> {
            return actionExecutor.handleUpdate(partitionPath, fileId, fewRecordsForDelete.iterator());
        }).map(Transformations::flatten).collect();
        // Verify there are  errors because records are from multiple partitions (but handleUpdate is invoked for
        // specific partition)
        WriteStatus status = deleteStatus.get(0).get(0);
        assertTrue(status.hasErrors());
        long numRecordsInPartition = fewRecordsForDelete.stream().filter(u -> u.getPartitionPath().equals(partitionPath)).count();
        assertEquals(fewRecordsForDelete.size() - numRecordsInPartition, status.getTotalErrorRecords());
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieClientTestHarness.buildProfile(org.apache.hudi.testutils.HoodieClientTestHarness.buildProfile) BeforeEach(org.junit.jupiter.api.BeforeEach) HoodieMergeOnReadTestUtils(org.apache.hudi.testutils.HoodieMergeOnReadTestUtils) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) StorageLevel(org.apache.spark.storage.StorageLevel) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) BaseSparkDeltaCommitActionExecutor(org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) IndexType(org.apache.hudi.index.HoodieIndex.IndexType) Set(java.util.Set) Collectors(java.util.stream.Collectors) HoodieIndex(org.apache.hudi.index.HoodieIndex) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) SparkDeleteDeltaCommitActionExecutor(org.apache.hudi.table.action.deltacommit.SparkDeleteDeltaCommitActionExecutor) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) Dataset(org.apache.spark.sql.Dataset) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HoodieReadClient(org.apache.hudi.client.HoodieReadClient) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) Row(org.apache.spark.sql.Row) JobConf(org.apache.hadoop.mapred.JobConf) WriteStatus(org.apache.hudi.client.WriteStatus) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) Transformations(org.apache.hudi.common.testutils.Transformations) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieSparkWriteableTestTable(org.apache.hudi.testutils.HoodieSparkWriteableTestTable) HoodieClusteringConfig(org.apache.hudi.config.HoodieClusteringConfig) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) FileStatus(org.apache.hadoop.fs.FileStatus) SparkDeleteDeltaCommitActionExecutor(org.apache.hudi.table.action.deltacommit.SparkDeleteDeltaCommitActionExecutor) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseSparkDeltaCommitActionExecutor(org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) List(java.util.List) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with BaseFileOnlyView

use of org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView in project hudi by apache.

the class TestClientRollback method testSavepointAndRollback.

/**
 * Test case for rollback-savepoint interaction.
 */
@Test
public void testSavepointAndRollback() throws Exception {
    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build()).build();
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
        /**
         * Write 1 (only inserts)
         */
        String newCommitTime = "001";
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
        assertNoWriteErrors(statuses);
        /**
         * Write 2 (updates)
         */
        newCommitTime = "002";
        client.startCommitWithTime(newCommitTime);
        records = dataGen.generateUpdates(newCommitTime, records);
        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        client.savepoint("hoodie-unit-test", "test");
        /**
         * Write 3 (updates)
         */
        newCommitTime = "003";
        client.startCommitWithTime(newCommitTime);
        records = dataGen.generateUpdates(newCommitTime, records);
        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        HoodieWriteConfig config = getConfig();
        List<String> partitionPaths = FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), cfg.getBasePath());
        metaClient = HoodieTableMetaClient.reload(metaClient);
        HoodieSparkTable table = HoodieSparkTable.create(getConfig(), context, metaClient);
        final BaseFileOnlyView view1 = table.getBaseFileOnlyView();
        List<HoodieBaseFile> dataFiles = partitionPaths.stream().flatMap(s -> {
            return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
        }).collect(Collectors.toList());
        assertEquals(3, dataFiles.size(), "The data files for commit 003 should be present");
        dataFiles = partitionPaths.stream().flatMap(s -> {
            return view1.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
        }).collect(Collectors.toList());
        assertEquals(3, dataFiles.size(), "The data files for commit 002 should be present");
        /**
         * Write 4 (updates)
         */
        newCommitTime = "004";
        client.startCommitWithTime(newCommitTime);
        records = dataGen.generateUpdates(newCommitTime, records);
        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
        metaClient = HoodieTableMetaClient.reload(metaClient);
        table = HoodieSparkTable.create(getConfig(), context, metaClient);
        final BaseFileOnlyView view2 = table.getBaseFileOnlyView();
        dataFiles = partitionPaths.stream().flatMap(s -> view2.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
        assertEquals(3, dataFiles.size(), "The data files for commit 004 should be present");
        // rolling back to a non existent savepoint must not succeed
        assertThrows(HoodieRollbackException.class, () -> {
            client.restoreToSavepoint("001");
        }, "Rolling back to non-existent savepoint should not be allowed");
        // rollback to savepoint 002
        HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
        client.restoreToSavepoint(savepoint.getTimestamp());
        metaClient = HoodieTableMetaClient.reload(metaClient);
        table = HoodieSparkTable.create(getConfig(), context, metaClient);
        final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
        dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
        assertEquals(3, dataFiles.size(), "The data files for commit 002 be available");
        dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
        assertEquals(0, dataFiles.size(), "The data files for commit 003 should be rolled back");
        dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
        assertEquals(0, dataFiles.size(), "The data files for commit 004 should be rolled back");
    }
}
Also used : HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Assertions.assertThrows(org.junit.jupiter.api.Assertions.assertThrows) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) Arrays(java.util.Arrays) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) HashMap(java.util.HashMap) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieRollbackException(org.apache.hudi.exception.HoodieRollbackException) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieRollbackPlan(org.apache.hudi.avro.model.HoodieRollbackPlan) Collectors(java.util.stream.Collectors) HoodieInstantInfo(org.apache.hudi.avro.model.HoodieInstantInfo) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieIndexConfig(org.apache.hudi.config.HoodieIndexConfig) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) BaseFileOnlyView(org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)8 BaseFileOnlyView (org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView)8 Path (org.apache.hadoop.fs.Path)7 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)7 ArrayList (java.util.ArrayList)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)5 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)5 IOException (java.io.IOException)4 List (java.util.List)4 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)4 HoodieTestDataGenerator (org.apache.hudi.common.testutils.HoodieTestDataGenerator)4 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)4 Arrays (java.util.Arrays)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Collectors (java.util.stream.Collectors)3 Stream (java.util.stream.Stream)3 GenericRecord (org.apache.avro.generic.GenericRecord)3 FileSystem (org.apache.hadoop.fs.FileSystem)3