Search in sources :

Example 91 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestIncrementalFSViewSync method testRestore.

private void testRestore(SyncableFileSystemView view, List<String> newRestoreInstants, Map<String, List<String>> instantsToFiles, List<HoodieInstant> rolledBackInstants, String emptyRestoreInstant, boolean isRestore, int totalReplacedFileSlicesPerPartition, int totalFilesAddedPerPartitionPerInstant) {
    assertEquals(newRestoreInstants.size(), rolledBackInstants.size());
    long initialFileSlices = partitions.stream().mapToLong(p -> view.getAllFileSlices(p).count()).findAny().getAsLong();
    final int numFileSlicesAddedPerInstant = (totalFilesAddedPerPartitionPerInstant - totalReplacedFileSlicesPerPartition);
    final long expectedLatestFileSlices = fileIdsPerPartition.size() + (rolledBackInstants.size()) * numFileSlicesAddedPerInstant;
    IntStream.range(0, newRestoreInstants.size()).forEach(idx -> {
        HoodieInstant instant = rolledBackInstants.get(idx);
        try {
            boolean isDeltaCommit = HoodieTimeline.DELTA_COMMIT_ACTION.equalsIgnoreCase(instant.getAction());
            performRestore(instant, instantsToFiles.get(instant.getTimestamp()), newRestoreInstants.get(idx), isRestore);
            final long expTotalFileSlicesPerPartition = isDeltaCommit ? initialFileSlices : initialFileSlices - ((idx + 1) * (fileIdsPerPartition.size() - totalReplacedFileSlicesPerPartition));
            view.sync();
            assertTrue(view.getLastInstant().isPresent());
            LOG.info("Last Instant is :" + view.getLastInstant().get());
            if (isRestore) {
                assertEquals(newRestoreInstants.get(idx), view.getLastInstant().get().getTimestamp());
                assertEquals(HoodieTimeline.RESTORE_ACTION, view.getLastInstant().get().getAction());
            }
            assertEquals(State.COMPLETED, view.getLastInstant().get().getState());
            if (HoodieTimeline.compareTimestamps(newRestoreInstants.get(idx), HoodieTimeline.GREATER_THAN_OR_EQUALS, emptyRestoreInstant)) {
                partitions.forEach(p -> assertEquals(0, view.getLatestFileSlices(p).count()));
            } else {
                partitions.forEach(p -> assertEquals(expectedLatestFileSlices - (idx + 1) * numFileSlicesAddedPerInstant, view.getLatestFileSlices(p).count()));
            }
            partitions.forEach(p -> assertEquals(expTotalFileSlicesPerPartition, view.getAllFileSlices(p).count()));
            metaClient.reloadActiveTimeline();
            SyncableFileSystemView newView = getFileSystemView(metaClient);
            areViewsConsistent(view, newView, expTotalFileSlicesPerPartition * partitions.size());
        } catch (IOException e) {
            throw new HoodieException(e);
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) IOException(java.io.IOException) HoodieIOException(org.apache.hudi.exception.HoodieIOException)

Example 92 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestIncrementalFSViewSync method addInstant.

private List<String> addInstant(HoodieTableMetaClient metaClient, String instant, boolean deltaCommit, String baseInstant) throws IOException {
    List<Pair<String, HoodieWriteStat>> writeStats = generateDataForInstant(baseInstant, instant, deltaCommit);
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    writeStats.forEach(e -> metadata.addWriteStat(e.getKey(), e.getValue()));
    HoodieInstant inflightInstant = new HoodieInstant(true, deltaCommit ? HoodieTimeline.DELTA_COMMIT_ACTION : HoodieTimeline.COMMIT_ACTION, instant);
    metaClient.getActiveTimeline().createNewInstant(inflightInstant);
    metaClient.getActiveTimeline().saveAsComplete(inflightInstant, Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    /*
    // Delete pending compaction if present
    metaClient.getFs().delete(new Path(metaClient.getMetaPath(),
        new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant).getFileName()));
     */
    return writeStats.stream().map(e -> e.getValue().getPath()).collect(Collectors.toList());
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) BeforeEach(org.junit.jupiter.api.BeforeEach) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieException(org.apache.hudi.exception.HoodieException) CollectionUtils(org.apache.hudi.common.util.CollectionUtils) COMPACTION_ACTION(org.apache.hudi.common.table.timeline.HoodieTimeline.COMPACTION_ACTION) Logger(org.apache.log4j.Logger) HoodieTableType(org.apache.hudi.common.model.HoodieTableType) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) Map(java.util.Map) HoodieRollbackMetadata(org.apache.hudi.avro.model.HoodieRollbackMetadata) Path(org.apache.hadoop.fs.Path) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) Set(java.util.Set) TimelineMetadataUtils(org.apache.hudi.common.table.timeline.TimelineMetadataUtils) UUID(java.util.UUID) HoodieCommonTestHarness(org.apache.hudi.common.testutils.HoodieCommonTestHarness) Collectors(java.util.stream.Collectors) StandardCharsets(java.nio.charset.StandardCharsets) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) Test(org.junit.jupiter.api.Test) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) List(java.util.List) HoodieWriteStat(org.apache.hudi.common.model.HoodieWriteStat) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) HoodieCompactionPlan(org.apache.hudi.avro.model.HoodieCompactionPlan) HoodieRestoreMetadata(org.apache.hudi.avro.model.HoodieRestoreMetadata) WriteOperationType(org.apache.hudi.common.model.WriteOperationType) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) IntStream(java.util.stream.IntStream) HoodieCleaningPolicy(org.apache.hudi.common.model.HoodieCleaningPolicy) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) HashMap(java.util.HashMap) State(org.apache.hudi.common.table.timeline.HoodieInstant.State) ArrayList(java.util.ArrayList) HoodieRequestedReplaceMetadata(org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata) CleanerUtils(org.apache.hudi.common.util.CleanerUtils) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieCleanStat(org.apache.hudi.common.HoodieCleanStat) Files(java.nio.file.Files) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) IOException(java.io.IOException) File(java.io.File) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieCleanMetadata(org.apache.hudi.avro.model.HoodieCleanMetadata) Paths(java.nio.file.Paths) HoodieIOException(org.apache.hudi.exception.HoodieIOException) LogManager(org.apache.log4j.LogManager) HoodieRollbackStat(org.apache.hudi.common.HoodieRollbackStat) Comparator(java.util.Comparator) Collections(java.util.Collections) FSUtils(org.apache.hudi.common.fs.FSUtils) Pair(org.apache.hudi.common.util.collection.Pair) Pair(org.apache.hudi.common.util.collection.Pair)

Example 93 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestIncrementalFSViewSync method testIngestion.

@Test
public void testIngestion() throws IOException {
    SyncableFileSystemView view = getFileSystemView(metaClient);
    // Add an empty ingestion
    String firstEmptyInstantTs = "11";
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    view.sync();
    assertTrue(view.getLastInstant().isPresent());
    assertEquals("11", view.getLastInstant().get().getTimestamp());
    assertEquals(State.COMPLETED, view.getLastInstant().get().getState());
    assertEquals(HoodieTimeline.COMMIT_ACTION, view.getLastInstant().get().getAction());
    partitions.forEach(p -> assertEquals(0, view.getLatestFileSlices(p).count()));
    metaClient.reloadActiveTimeline();
    SyncableFileSystemView newView = getFileSystemView(metaClient);
    areViewsConsistent(view, newView, 0L);
    // Add 3 non-empty ingestions to COW table
    Map<String, List<String>> instantsToFiles = testMultipleWriteSteps(view, Arrays.asList("12", "13", "14"));
    // restore instants in reverse order till we rollback all
    testRestore(view, Arrays.asList("15", "16", "17"), instantsToFiles, Arrays.asList(getHoodieCommitInstant("14", false), getHoodieCommitInstant("13", false), getHoodieCommitInstant("12", false)), "17", true);
    // Add 5 non-empty ingestions back-to-back
    instantsToFiles = testMultipleWriteSteps(view, Arrays.asList("18", "19", "20"));
    // Clean instants.
    testCleans(view, Arrays.asList("21", "22"), instantsToFiles, Arrays.asList("18", "19"), 0, 0);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 94 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestIncrementalFSViewSync method testReplaceCommits.

@Test
public void testReplaceCommits() throws IOException {
    SyncableFileSystemView view = getFileSystemView(metaClient);
    // Add an empty ingestion
    String firstEmptyInstantTs = "11";
    HoodieCommitMetadata metadata = new HoodieCommitMetadata();
    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs));
    metaClient.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, firstEmptyInstantTs), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
    view.sync();
    assertTrue(view.getLastInstant().isPresent());
    assertEquals("11", view.getLastInstant().get().getTimestamp());
    assertEquals(State.COMPLETED, view.getLastInstant().get().getState());
    assertEquals(HoodieTimeline.COMMIT_ACTION, view.getLastInstant().get().getAction());
    partitions.forEach(p -> assertEquals(0, view.getLatestFileSlices(p).count()));
    metaClient.reloadActiveTimeline();
    SyncableFileSystemView newView = getFileSystemView(metaClient);
    areViewsConsistent(view, newView, 0L);
    // Add 1 non-empty ingestions to COW table
    Map<String, List<String>> instantsToFiles = testMultipleWriteSteps(view, Arrays.asList("12"));
    // ADD replace instants
    testMultipleReplaceSteps(instantsToFiles, view, Arrays.asList("13", "14"), NUM_FILE_IDS_PER_PARTITION);
    // restore instants in reverse order till we rollback all replace instants
    testRestore(view, Arrays.asList("15", "16"), instantsToFiles, Arrays.asList(getHoodieReplaceInstant("14"), getHoodieReplaceInstant("13")), "17", true, 1, fileIdsPerPartition.size());
    // clear files from inmemory view for replaced instants
    instantsToFiles.remove("14");
    instantsToFiles.remove("13");
    // add few more replace instants
    testMultipleReplaceSteps(instantsToFiles, view, Arrays.asList("18", "19", "20"), NUM_FILE_IDS_PER_PARTITION);
    // Clean instants.
    testCleans(view, Arrays.asList("21", "22"), instantsToFiles, Arrays.asList("18", "19"), NUM_FILE_IDS_PER_PARTITION, 1);
}
Also used : HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 95 with HoodieInstant

use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.

the class TestTimelineUtils method testGetPartitions.

@Test
public void testGetPartitions() throws IOException {
    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
    HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
    assertTrue(activeCommitTimeline.empty());
    // older partitions that is modified by all cleans
    String olderPartition = "0";
    for (int i = 1; i <= 5; i++) {
        String ts = i + "";
        HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, ts);
        activeTimeline.createNewInstant(instant);
        activeTimeline.saveAsComplete(instant, Option.of(getCommitMetadata(basePath, ts, ts, 2, Collections.emptyMap())));
        HoodieInstant cleanInstant = new HoodieInstant(true, HoodieTimeline.CLEAN_ACTION, ts);
        activeTimeline.createNewInstant(cleanInstant);
        activeTimeline.saveAsComplete(cleanInstant, getCleanMetadata(olderPartition, ts));
    }
    metaClient.reloadActiveTimeline();
    // verify modified partitions included cleaned data
    List<String> partitions = TimelineUtils.getAffectedPartitions(metaClient.getActiveTimeline().findInstantsAfter("1", 10));
    assertEquals(5, partitions.size());
    assertEquals(partitions, Arrays.asList(new String[] { "0", "2", "3", "4", "5" }));
    partitions = TimelineUtils.getAffectedPartitions(metaClient.getActiveTimeline().findInstantsInRange("1", "4"));
    assertEquals(4, partitions.size());
    assertEquals(partitions, Arrays.asList(new String[] { "0", "2", "3", "4" }));
    // verify only commit actions
    partitions = TimelineUtils.getPartitionsWritten(metaClient.getActiveTimeline().findInstantsAfter("1", 10));
    assertEquals(4, partitions.size());
    assertEquals(partitions, Arrays.asList(new String[] { "2", "3", "4", "5" }));
    partitions = TimelineUtils.getPartitionsWritten(metaClient.getActiveTimeline().findInstantsInRange("1", "4"));
    assertEquals(3, partitions.size());
    assertEquals(partitions, Arrays.asList(new String[] { "2", "3", "4" }));
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieActiveTimeline(org.apache.hudi.common.table.timeline.HoodieActiveTimeline) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Test(org.junit.jupiter.api.Test)

Aggregations

HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)323 HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)129 ArrayList (java.util.ArrayList)118 List (java.util.List)116 IOException (java.io.IOException)112 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)104 Test (org.junit.jupiter.api.Test)97 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)96 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)89 Map (java.util.Map)84 Option (org.apache.hudi.common.util.Option)84 HoodieWriteConfig (org.apache.hudi.config.HoodieWriteConfig)84 Collectors (java.util.stream.Collectors)83 HashMap (java.util.HashMap)81 Path (org.apache.hadoop.fs.Path)78 Pair (org.apache.hudi.common.util.collection.Pair)71 Logger (org.apache.log4j.Logger)67 LogManager (org.apache.log4j.LogManager)66 HoodieIOException (org.apache.hudi.exception.HoodieIOException)65 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)61