Search in sources :

Example 6 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestCleaner method generateCommitMetadata.

protected static HoodieCommitMetadata generateCommitMetadata(String instantTime, Map<String, List<String>> partitionToFilePaths) {
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    partitionToFilePaths.forEach((partitionPath, fileList) -> fileList.forEach(f -> {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partitionPath);
        writeStat.setPath(partitionPath + "/" + getBaseFilename(instantTime, f));
        writeStat.setFileId(f);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        metadata.addWriteStat(partitionPath, writeStat);
    }));
    return metadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Arrays(java.util.Arrays) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanPlanner(org.apache.hudi.table.action.clean.CleanPlanner) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Awaitility.await(org.awaitility.Awaitility.await) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) CleanPlanMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) CleanPlanV1MigrationHandler(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StringUtils(org.apache.hudi.common.util.StringUtils) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTestCommitGenerator.getBaseFilename(org.apache.hudi.HoodieTestCommitGenerator.getBaseFilename) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieTestTable.makeIncrementalCommitTimes(org.apache.hudi.common.testutils.HoodieTestTable.makeIncrementalCommitTimes) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) MethodSource(org.junit.jupiter.params.provider.MethodSource) IOType(org.apache.hudi.common.model.IOType) Predicate(java.util.function.Predicate) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Tuple3(scala.Tuple3) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) Test(org.junit.jupiter.api.Test) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) CleanMetadataMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) FileSlice(org.apache.hudi.common.model.FileSlice) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) TestBootstrapIndex(org.apache.hudi.common.bootstrap.TestBootstrapIndex) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) TimeUnit(java.util.concurrent.TimeUnit) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieTestTable.makeNewCommitTime(org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat)

Example 7 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieMergeOnReadTable method testMetadataStatsOnCommit.

/**
 * Test to ensure metadata stats are correctly written to metadata file.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMetadataStatsOnCommit(Boolean rollbackUsingMarkers) throws Exception {
    HoodieWriteConfig cfg = getConfigBuilder(false, rollbackUsingMarkers, IndexType.INMEMORY).withAutoCommit(false).build();
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
        // Create a commit without metadata stats in metadata to test backwards compatibility
        HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
        String commitActionType = table.getMetaClient().getCommitActionType();
        HoodieInstant instant = new HoodieInstant(State.REQUESTED, commitActionType, "000");
        activeTimeline.createNewInstant(instant);
        activeTimeline.transitionRequestedToInflight(instant, Option.empty());
        instant = new HoodieInstant(State.INFLIGHT, commitActionType, "000");
        activeTimeline.saveAsComplete(instant, Option.empty());
        String instantTime = "001";
        client.startCommitWithTime(instantTime);
        List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
        JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
        JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
        assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        int inserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                inserts += stat.getNumInserts();
            }
        }
        assertEquals(200, inserts);
        instantTime = "002";
        client.startCommitWithTime(instantTime);
        records = dataGen.generateUpdates(instantTime, records);
        writeRecords = jsc().parallelize(records, 1);
        statuses = client.upsert(writeRecords, instantTime);
        // assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        inserts = 0;
        int upserts = 0;
        List<WriteStatus> writeStatusList = statuses.collect();
        for (WriteStatus ws : writeStatusList) {
            inserts += ws.getStat().getNumInserts();
            upserts += ws.getStat().getNumUpdateWrites();
        }
        // Read from commit file
        assertEquals(0, inserts);
        assertEquals(200, upserts);
        client.rollback(instantTime);
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        inserts = 0;
        upserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                inserts += stat.getNumInserts();
                upserts += stat.getNumUpdateWrites();
            }
        }
        assertEquals(200, inserts);
        assertEquals(0, upserts);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) Map(java.util.Map) HashMap(java.util.HashMap) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 8 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieMergeOnReadTable method testRollingStatsWithSmallFileHandling.

/**
 * Test to ensure rolling stats are correctly written to the metadata file, identifies small files and corrects them.
 */
@Test
public void testRollingStatsWithSmallFileHandling() throws Exception {
    HoodieWriteConfig cfg = getConfigBuilder(false, IndexType.INMEMORY).withAutoCommit(false).build();
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        Map<String, Long> fileIdToInsertsMap = new HashMap<>();
        Map<String, Long> fileIdToUpsertsMap = new HashMap<>();
        String instantTime = "000";
        client.startCommitWithTime(instantTime);
        List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
        JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
        JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
        assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        // Read from commit file
        HoodieTable table = HoodieSparkTable.create(cfg, context());
        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        int inserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                inserts += stat.getNumInserts();
                fileIdToInsertsMap.put(stat.getFileId(), stat.getNumInserts());
                fileIdToUpsertsMap.put(stat.getFileId(), stat.getNumUpdateWrites());
            }
        }
        assertEquals(200, inserts);
        instantTime = "001";
        client.startCommitWithTime(instantTime);
        // generate updates + inserts. inserts should be handled into small files
        records = dataGen.generateUpdates(instantTime, records);
        records.addAll(dataGen.generateInserts(instantTime, 200));
        writeRecords = jsc().parallelize(records, 1);
        statuses = client.upsert(writeRecords, instantTime);
        assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        inserts = 0;
        int upserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                assertTrue(fileIdToInsertsMap.containsKey(stat.getFileId()));
                assertTrue(fileIdToUpsertsMap.containsKey(stat.getFileId()));
                inserts += stat.getNumInserts();
                upserts += stat.getNumUpdateWrites();
            }
        }
        assertEquals(200, inserts);
        assertEquals(200, upserts);
        // Test small file handling after compaction
        instantTime = "002";
        client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata()));
        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instantTime);
        statuses = compactionMetadata.getWriteStatuses();
        client.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        HoodieCommitMetadata metadata1 = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getCommitsTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        // Ensure that the metadata stats from the extra metadata of delta commits is copied over to the compaction commit
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            assertTrue(metadata1.getPartitionToWriteStats().containsKey(pstat.getKey()));
            assertEquals(metadata1.getPartitionToWriteStats().get(pstat.getKey()).size(), pstat.getValue().size());
        }
        // Write inserts + updates
        instantTime = "003";
        client.startCommitWithTime(instantTime);
        // generate updates + inserts. inserts should be handled into small files
        records = dataGen.generateUpdates(instantTime, records);
        records.addAll(dataGen.generateInserts(instantTime, 200));
        writeRecords = jsc().parallelize(records, 1);
        statuses = client.upsert(writeRecords, instantTime);
        assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        inserts = 0;
        upserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                assertTrue(fileIdToInsertsMap.containsKey(stat.getFileId()));
                inserts += stat.getNumInserts();
                upserts += stat.getNumUpdateWrites();
            }
        }
        assertEquals(200, inserts);
        assertEquals(400, upserts);
    }
}
Also used : SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HashMap(java.util.HashMap) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieJavaRDD(org.apache.hudi.data.HoodieJavaRDD) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) Map(java.util.Map) HashMap(java.util.HashMap) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 9 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableCompaction method writeData.

private List<WriteStatus> writeData(String instant, int numRecords, boolean doCommit) {
    metaClient = HoodieTableMetaClient.reload(metaClient);
    JavaRDD records = jsc().parallelize(dataGen.generateInserts(instant, numRecords), 2);
    metaClient = HoodieTableMetaClient.reload(metaClient);
    client.startCommitWithTime(instant);
    List<WriteStatus> writeStatuses = client.upsert(records, instant).collect();
    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(writeStatuses);
    if (doCommit) {
        List<HoodieWriteStat> writeStats = writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList());
        boolean committed = client.commitStats(instant, writeStats, Option.empty(), metaClient.getCommitActionType());
        Assertions.assertTrue(committed);
    }
    metaClient = HoodieTableMetaClient.reload(metaClient);
    return writeStatuses;
}
Also used : HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) WriteStatus(org.apache.hudi.client.WriteStatus) JavaRDD(org.apache.spark.api.java.JavaRDD)

Example 10 with HoodieWriteStat

use of org.apache.hudi.common.model.HoodieWriteStat in project hudi by apache.

the class TestHoodieSparkMergeOnReadTableInsertUpdateDelete method testSimpleInsertsGeneratedIntoLogFiles.

@Test
public void testSimpleInsertsGeneratedIntoLogFiles() throws Exception {
    // insert 100 records
    // Setting IndexType to be InMemory to simulate Global Index nature
    HoodieWriteConfig config = getConfigBuilder(false, HoodieIndex.IndexType.INMEMORY).build();
    Properties properties = new Properties();
    properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().toString());
    HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
    try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
        String newCommitTime = "100";
        writeClient.startCommitWithTime(newCommitTime);
        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
        JavaRDD<HoodieRecord> recordsRDD = jsc().parallelize(records, 1);
        JavaRDD<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime);
        writeClient.commit(newCommitTime, statuses);
        HoodieTable table = HoodieSparkTable.create(config, context(), metaClient);
        table.getHoodieView().sync();
        TableFileSystemView.SliceView tableRTFileSystemView = table.getSliceView();
        long numLogFiles = 0;
        for (String partitionPath : dataGen.getPartitionPaths()) {
            List<FileSlice> allSlices = tableRTFileSystemView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
            assertEquals(0, allSlices.stream().filter(fileSlice -> fileSlice.getBaseFile().isPresent()).count());
            assertTrue(allSlices.stream().anyMatch(fileSlice -> fileSlice.getLogFiles().count() > 0));
            long logFileCount = allSlices.stream().filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count();
            if (logFileCount > 0) {
                // check the log versions start from the base version
                assertTrue(allSlices.stream().map(slice -> slice.getLogFiles().findFirst().get().getLogVersion()).allMatch(version -> version.equals(HoodieLogFile.LOGFILE_BASE_VERSION)));
            }
            numLogFiles += logFileCount;
        }
        assertTrue(numLogFiles > 0);
        // Do a compaction
        String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instantTime);
        String extension = table.getBaseFileExtension();
        Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
        assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
        assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
        writeClient.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
    }
}
Also used : HoodieTable(org.apache.hudi.table.HoodieTable) HoodieMergeOnReadTestUtils(org.apache.hudi.testutils.HoodieMergeOnReadTestUtils) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) Option(org.apache.hudi.common.util.Option) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieSparkTable(org.apache.hudi.table.HoodieSparkTable) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) HoodieTableConfig(org.apache.hudi.common.table.HoodieTableConfig) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) Tag(org.junit.jupiter.api.Tag) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieWriteMetadata(org.apache.hudi.table.action.HoodieWriteMetadata) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) JavaRDD(org.apache.spark.api.java.JavaRDD) MethodSource(org.junit.jupiter.params.provider.MethodSource) ValueSource(org.junit.jupiter.params.provider.ValueSource) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) Properties(java.util.Properties) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Collection(java.util.Collection) Arguments(org.junit.jupiter.params.provider.Arguments) Collectors(java.util.stream.Collectors) FileCreateUtils(org.apache.hudi.common.testutils.FileCreateUtils) HoodieIndex(org.apache.hudi.index.HoodieIndex) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.jupiter.api.Test) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieFileFormat(org.apache.hudi.common.model.HoodieFileFormat) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) List(java.util.List) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) Stream(java.util.stream.Stream) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) SparkClientFunctionalTestHarness(org.apache.hudi.testutils.SparkClientFunctionalTestHarness) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieClientTestUtils(org.apache.hudi.testutils.HoodieClientTestUtils) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) FileSlice(org.apache.hudi.common.model.FileSlice) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Properties(java.util.Properties) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTable(org.apache.hudi.table.HoodieTable) Collection(java.util.Collection) List(java.util.List) HoodieTableFileSystemView(org.apache.hudi.common.table.view.HoodieTableFileSystemView) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieTestDataGenerator(org.apache.hudi.common.testutils.HoodieTestDataGenerator) WriteStatus(org.apache.hudi.client.WriteStatus) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)74 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)42 List (java.util.List)38 ArrayList (java.util.ArrayList)33 HashMap (java.util.HashMap)32 Map (java.util.Map)32 Path (org.apache.hadoop.fs.Path)28 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)24 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)23 IOException (java.io.IOException)22 Option (org.apache.hudi.common.util.Option)19 Collectors (java.util.stream.Collectors)18 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)18 WriteStatus (org.apache.hudi.client.WriteStatus)17 HoodieReplaceCommitMetadata (org.apache.hudi.common.model.HoodieReplaceCommitMetadata)17 LogManager (org.apache.log4j.LogManager)16 Logger (org.apache.log4j.Logger)16 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)15 FileSlice (org.apache.hudi.common.model.FileSlice)14 HoodieRecord (org.apache.hudi.common.model.HoodieRecord)14