Search in sources :

Example 6 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method testConcurrentWritesWithInterleavingSuccessfulReplace.

@Test
public void testConcurrentWritesWithInterleavingSuccessfulReplace() throws Exception {
    createCommit(HoodieActiveTimeline.createNewInstantTime());
    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
    // consider commits before this are all successful
    Option<HoodieInstant> lastSuccessfulInstant = timeline.getCommitsTimeline().filterCompletedInstants().lastInstant();
    // writer 1 starts
    String currentWriterInstant = HoodieActiveTimeline.createNewInstantTime();
    createInflightCommit(currentWriterInstant);
    // replace 1 gets scheduled and finished
    String newInstantTime = HoodieActiveTimeline.createNewInstantTime();
    createReplace(newInstantTime, WriteOperationType.INSERT_OVERWRITE);
    Option<HoodieInstant> currentInstant = Option.of(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, currentWriterInstant));
    SimpleConcurrentFileWritesConflictResolutionStrategy strategy = new SimpleConcurrentFileWritesConflictResolutionStrategy();
    HoodieCommitMetadata currentMetadata = createCommitMetadata(currentWriterInstant);
    timeline = timeline.reload();
    List<HoodieInstant> candidateInstants = strategy.getCandidateInstants(timeline, currentInstant.get(), lastSuccessfulInstant).collect(Collectors.toList());
    // writer 1 conflicts with replace 1
    Assertions.assertTrue(candidateInstants.size() == 1);
    ConcurrentOperation thatCommitOperation = new ConcurrentOperation(candidateInstants.get(0), metaClient);
    ConcurrentOperation thisCommitOperation = new ConcurrentOperation(currentInstant.get(), currentMetadata);
    Assertions.assertTrue(strategy.hasConflict(thisCommitOperation, thatCommitOperation));
    try {
        strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation);
        Assertions.fail("Cannot reach here, should have thrown a conflict");
    } catch (HoodieWriteConflictException e) {
    // expected
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWriteConflictException(org.apache.hudi.exception.HoodieWriteConflictException) Test(org.junit.jupiter.api.Test)

Example 7 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestSimpleConcurrentFileWritesConflictResolutionStrategy method testConcurrentWritesWithInterleavingSuccessfulCompaction.

@Test
public void testConcurrentWritesWithInterleavingSuccessfulCompaction() throws Exception {
    createCommit(HoodieActiveTimeline.createNewInstantTime());
    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
    // consider commits before this are all successful
    Option<HoodieInstant> lastSuccessfulInstant = timeline.getCommitsTimeline().filterCompletedInstants().lastInstant();
    // writer 1 starts
    String currentWriterInstant = HoodieActiveTimeline.createNewInstantTime();
    createInflightCommit(currentWriterInstant);
    // compaction 1 gets scheduled and finishes
    String newInstantTime = HoodieActiveTimeline.createNewInstantTime();
    createCompaction(newInstantTime);
    Option<HoodieInstant> currentInstant = Option.of(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, currentWriterInstant));
    SimpleConcurrentFileWritesConflictResolutionStrategy strategy = new SimpleConcurrentFileWritesConflictResolutionStrategy();
    HoodieCommitMetadata currentMetadata = createCommitMetadata(currentWriterInstant);
    timeline = timeline.reload();
    List<HoodieInstant> candidateInstants = strategy.getCandidateInstants(timeline, currentInstant.get(), lastSuccessfulInstant).collect(Collectors.toList());
    // writer 1 conflicts with compaction 1
    Assertions.assertTrue(candidateInstants.size() == 1);
    ConcurrentOperation thatCommitOperation = new ConcurrentOperation(candidateInstants.get(0), metaClient);
    ConcurrentOperation thisCommitOperation = new ConcurrentOperation(currentInstant.get(), currentMetadata);
    Assertions.assertTrue(strategy.hasConflict(thisCommitOperation, thatCommitOperation));
    try {
        strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation);
        Assertions.fail("Cannot reach here, should have thrown a conflict");
    } catch (HoodieWriteConflictException e) {
    // expected
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieWriteConflictException(org.apache.hudi.exception.HoodieWriteConflictException) Test(org.junit.jupiter.api.Test)

Example 8 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestCleaner method testKeepLatestCommits.

/**
 * Test HoodieTable.clean() Cleaning by commit logic for COW table.
 */
@ParameterizedTest
@MethodSource("argumentsForTestKeepLatestCommits")
public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withIncrementalCleaningMode(enableIncrementalClean).withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER).withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean).withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
    String p0 = "2020/01/01";
    String p1 = "2020/01/02";
    Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
    // make 1 commit, with 1 file per partition
    String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId() : UUID.randomUUID().toString();
    String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId() : UUID.randomUUID().toString();
    testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001", Collections.unmodifiableMap(new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0));
            put(p1, CollectionUtils.createImmutableList(file1P1C0));
        }
    }));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    metaClient = HoodieTableMetaClient.reload(metaClient);
    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
    assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
    assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    Map<String, String> partitionAndFileId002 = testTable.addInflightCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0, p1);
    String file2P0C1 = partitionAndFileId002.get(p0);
    String file2P1C1 = partitionAndFileId002.get(p1);
    testTable.forCommit("00000000000002").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
    commitMetadata = generateCommitMetadata("00000000000002", new HashMap<String, List<String>>() {

        {
            put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
            put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
        }
    });
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000002"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
    assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
    assertTrue(testTable.baseFileExists(p1, "00000000000002", file2P1C1));
    assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
    // make next commit, with 2 updates to existing files, and 1 insert
    String file3P0C2 = testTable.addInflightCommit("00000000000003").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p0, file2P0C1).getFileIdsWithBaseFilesInPartitions(p0).get(p0);
    commitMetadata = generateCommitMetadata("00000000000003", CollectionUtils.createImmutableMap(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000003"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry);
    assertEquals(0, hoodieCleanStatsThree.size(), "Must not clean any file. We have to keep 1 version before the latest commit time to keep");
    assertTrue(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    // make next commit, with 2 updates to existing files, and 1 insert
    String file4P0C3 = testTable.addInflightCommit("00000000000004").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p0, file2P0C1).getFileIdsWithBaseFilesInPartitions(p0).get(p0);
    commitMetadata = generateCommitMetadata("00000000000004", CollectionUtils.createImmutableMap(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000004"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry);
    // enableBootstrapSourceClean would delete the bootstrap base file as the same time
    HoodieCleanStat partitionCleanStat = getCleanStat(hoodieCleanStatsFour, p0);
    assertEquals(enableBootstrapSourceClean ? 2 : 1, partitionCleanStat.getSuccessDeleteFiles().size() + (partitionCleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0 : partitionCleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least one old file");
    assertFalse(testTable.baseFileExists(p0, "00000000000001", file1P0C0));
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file1P0C0));
    assertTrue(testTable.baseFileExists(p0, "00000000000003", file1P0C0));
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
    assertTrue(testTable.baseFileExists(p0, "00000000000003", file2P0C1));
    assertTrue(testTable.baseFileExists(p0, "00000000000003", file3P0C2));
    assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
    if (enableBootstrapSourceClean) {
        assertFalse(Files.exists(Paths.get(bootstrapMapping.get(p0).get(0).getBootstrapFileStatus().getPath().getUri())));
    }
    // No cleaning on partially written file, with no commit.
    testTable.forCommit("00000000000005").withBaseFilesInPartition(p0, file3P0C2);
    commitMetadata = generateCommitMetadata("00000000000005", CollectionUtils.createImmutableMap(p0, CollectionUtils.createImmutableList(file3P0C2)));
    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000005"));
    metaClient.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000005"), Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry);
    HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, p0);
    assertNull(cleanStat, "Must not clean any files");
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file1P0C0));
    assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
    assertTrue(testTable.baseFileExists(p0, "00000000000005", file3P0C2));
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HashMap(java.util.HashMap) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) ArrayList(java.util.ArrayList) List(java.util.List) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 9 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestCleaner method generateCommitMetadata.

protected static HoodieCommitMetadata generateCommitMetadata(String instantTime, Map<String, List<String>> partitionToFilePaths) {
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    partitionToFilePaths.forEach((partitionPath, fileList) -> fileList.forEach(f -> {
        HoodieWriteStat writeStat = new HoodieWriteStat();
        writeStat.setPartitionPath(partitionPath);
        writeStat.setPath(partitionPath + "/" + getBaseFilename(instantTime, f));
        writeStat.setFileId(f);
        writeStat.setTotalWriteBytes(1);
        writeStat.setFileSizeInBytes(1);
        metadata.addWriteStat(partitionPath, writeStat);
    }));
    return metadata;
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) Arrays(java.util.Arrays) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) CleanPlanner(org.apache.hudi.table.action.clean.CleanPlanner) HoodieFileStatus(org.apache.hudi.avro.model.HoodieFileStatus) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) Awaitility.await(org.awaitility.Awaitility.await) HoodieCleanerPlan(org.apache.hudi.avro.model.HoodieCleanerPlan) HoodieClusteringPlan(org.apache.hudi.avro.model.HoodieClusteringPlan) Set(java.util.Set) Arguments(org.junit.jupiter.params.provider.Arguments) HoodieIndex(org.apache.hudi.index.HoodieIndex) StandardCharsets(java.nio.charset.StandardCharsets) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) Stream(java.util.stream.Stream) FileSystemViewStorageConfig(org.apache.hudi.common.table.view.FileSystemViewStorageConfig) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) TableFileSystemView(org.apache.hudi.common.table.view.TableFileSystemView) HoodieClientTestBase(org.apache.hudi.testutils.HoodieClientTestBase) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) CleanPlanMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) Option(org.apache.hudi.common.util.Option) CleanPlanV1MigrationHandler(org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) StringUtils(org.apache.hudi.common.util.StringUtils) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTestCommitGenerator.getBaseFilename(org.apache.hudi.HoodieTestCommitGenerator.getBaseFilename) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JavaRDD(org.apache.spark.api.java.JavaRDD) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) HoodieMetadataConfig(org.apache.hudi.common.config.HoodieMetadataConfig) ValueSource(org.junit.jupiter.params.provider.ValueSource) ConsistencyGuardConfig(org.apache.hudi.common.fs.ConsistencyGuardConfig) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) Assertions.assertNoWriteErrors(org.apache.hudi.testutils.Assertions.assertNoWriteErrors) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) Files(java.nio.file.Files) HoodieTestTable(org.apache.hudi.common.testutils.HoodieTestTable) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieCompactionConfig(org.apache.hudi.config.HoodieCompactionConfig) HoodieClusteringGroup(org.apache.hudi.avro.model.HoodieClusteringGroup) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieTableMetadataWriter(org.apache.hudi.metadata.HoodieTableMetadataWriter) HoodieIOException(org.apache.hudi.exception.HoodieIOException) HoodieCleanPartitionMetadata(org.apache.hudi.avro.model.HoodieCleanPartitionMetadata) HoodieTestUtils(org.apache.hudi.common.testutils.HoodieTestUtils) HoodieFailedWritesCleaningPolicy(org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) HoodieTestTable.makeIncrementalCommitTimes(org.apache.hudi.common.testutils.HoodieTestTable.makeIncrementalCommitTimes) HoodieMetadataTestTable(org.apache.hudi.common.testutils.HoodieMetadataTestTable) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) SparkHoodieBackedTableMetadataWriter(org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter) Path(org.apache.hadoop.fs.Path) MethodSource(org.junit.jupiter.params.provider.MethodSource) IOType(org.apache.hudi.common.model.IOType) Predicate(java.util.function.Predicate) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) Tuple3(scala.Tuple3) HoodieClusteringStrategy(org.apache.hudi.avro.model.HoodieClusteringStrategy) Test(org.junit.jupiter.api.Test) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) CleanMetadataMigrator(org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) DEFAULT_PARTITION_PATHS(org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) FileSlice(org.apache.hudi.common.model.FileSlice) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) HashSet(java.util.HashSet) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) TestBootstrapIndex(org.apache.hudi.common.bootstrap.TestBootstrapIndex) HoodieActionInstant(org.apache.hudi.avro.model.HoodieActionInstant) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) TimeUnit(java.util.concurrent.TimeUnit) WriteStatus(org.apache.hudi.client.WriteStatus) HoodieTestTable.makeNewCommitTime(org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) SparkHoodieIndexFactory(org.apache.hudi.index.SparkHoodieIndexFactory) HoodieSliceInfo(org.apache.hudi.avro.model.HoodieSliceInfo) LogManager(org.apache.log4j.LogManager) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat)

Example 10 with HoodieCommitMetadata

use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.

the class TestHoodieMergeOnReadTable method testMetadataStatsOnCommit.

/**
 * Test to ensure metadata stats are correctly written to metadata file.
 */
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testMetadataStatsOnCommit(Boolean rollbackUsingMarkers) throws Exception {
    HoodieWriteConfig cfg = getConfigBuilder(false, rollbackUsingMarkers, IndexType.INMEMORY).withAutoCommit(false).build();
    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
        HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
        // Create a commit without metadata stats in metadata to test backwards compatibility
        HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
        String commitActionType = table.getMetaClient().getCommitActionType();
        HoodieInstant instant = new HoodieInstant(State.REQUESTED, commitActionType, "000");
        activeTimeline.createNewInstant(instant);
        activeTimeline.transitionRequestedToInflight(instant, Option.empty());
        instant = new HoodieInstant(State.INFLIGHT, commitActionType, "000");
        activeTimeline.saveAsComplete(instant, Option.empty());
        String instantTime = "001";
        client.startCommitWithTime(instantTime);
        List<HoodieRecord> records = dataGen.generateInserts(instantTime, 200);
        JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
        JavaRDD<WriteStatus> statuses = client.insert(writeRecords, instantTime);
        assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        int inserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                inserts += stat.getNumInserts();
            }
        }
        assertEquals(200, inserts);
        instantTime = "002";
        client.startCommitWithTime(instantTime);
        records = dataGen.generateUpdates(instantTime, records);
        writeRecords = jsc().parallelize(records, 1);
        statuses = client.upsert(writeRecords, instantTime);
        // assertTrue(client.commit(instantTime, statuses), "Commit should succeed");
        inserts = 0;
        int upserts = 0;
        List<WriteStatus> writeStatusList = statuses.collect();
        for (WriteStatus ws : writeStatusList) {
            inserts += ws.getStat().getNumInserts();
            upserts += ws.getStat().getNumUpdateWrites();
        }
        // Read from commit file
        assertEquals(0, inserts);
        assertEquals(200, upserts);
        client.rollback(instantTime);
        // Read from commit file
        table = HoodieSparkTable.create(cfg, context());
        metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table.getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class);
        inserts = 0;
        upserts = 0;
        for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
            for (HoodieWriteStat stat : pstat.getValue()) {
                inserts += stat.getNumInserts();
                upserts += stat.getNumUpdateWrites();
            }
        }
        assertEquals(200, inserts);
        assertEquals(0, upserts);
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) SparkRDDWriteClient(org.apache.hudi.client.SparkRDDWriteClient) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieRecord(org.apache.hudi.common.model.HoodieRecord) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) List(java.util.List) Map(java.util.Map) HashMap(java.util.HashMap) MetadataMergeWriteStatus(org.apache.hudi.testutils.MetadataMergeWriteStatus) WriteStatus(org.apache.hudi.client.WriteStatus) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)139 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)64 ArrayList (java.util.ArrayList)54 HashMap (java.util.HashMap)49 List (java.util.List)48 HoodieWriteStat (org.apache.hudi.common.model.HoodieWriteStat)44 IOException (java.io.IOException)42 Test (org.junit.jupiter.api.Test)41 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)40 Map (java.util.Map)38 Path (org.apache.hadoop.fs.Path)36 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)34 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)34 File (java.io.File)26 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)26 Option (org.apache.hudi.common.util.Option)25 Schema (org.apache.avro.Schema)22 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)21 Collectors (java.util.stream.Collectors)20 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)20