Search in sources :

Example 6 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class TestHoodieTimelineArchiver method testPendingClusteringWillBlockArchival.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testPendingClusteringWillBlockArchival(boolean enableMetadata) throws Exception {
    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 5, 2);
    HoodieTestDataGenerator.createPendingReplaceFile(basePath, "00000000", wrapperFs.getConf());
    for (int i = 1; i < 8; i++) {
        testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 2);
        // archival
        Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
        List<HoodieInstant> originalCommits = commitsList.getKey();
        List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
        assertEquals(originalCommits, commitsAfterArchival);
    }
    HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
    assertEquals(7, timeline.countInstants(), "Since we have a pending clustering instant at 00000000, we should never archive any commit after 00000000");
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieWriteConfig(org.apache.hudi.config.HoodieWriteConfig) List(java.util.List) ArrayList(java.util.ArrayList) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 7 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class TestHoodieTimelineArchiver method verifyArchival.

private void verifyArchival(List<HoodieInstant> expectedArchivedInstants, List<HoodieInstant> expectedActiveInstants, List<HoodieInstant> commitsAfterArchival) {
    Collections.sort(expectedActiveInstants, Comparator.comparing(HoodieInstant::getTimestamp));
    Collections.sort(commitsAfterArchival, Comparator.comparing(HoodieInstant::getTimestamp));
    assertEquals(expectedActiveInstants, commitsAfterArchival);
    expectedArchivedInstants.forEach(entry -> assertFalse(commitsAfterArchival.contains(entry)));
    HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
    List<HoodieInstant> actualArchivedInstants = archivedTimeline.getInstants().collect(Collectors.toList());
    Collections.sort(actualArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
    Collections.sort(expectedArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
    assertEquals(actualArchivedInstants, expectedArchivedInstants);
    HoodieTimeline timeline = metaClient.getActiveTimeline();
    expectedArchivedInstants.forEach(entry -> {
        // check safety
        if (entry.getAction() != HoodieTimeline.ROLLBACK_ACTION) {
            assertTrue(timeline.containsOrBeforeTimelineStarts(entry.getTimestamp()), "Archived commits should always be safe");
        }
    });
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieArchivedTimeline(org.apache.hudi.common.table.timeline.HoodieArchivedTimeline)

Example 8 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class HoodieClusteringJob method doScheduleAndCluster.

private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
    LOG.info("Step 1: Do schedule");
    String schemaStr = getSchemaFromLatestInstant();
    try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
        Option<String> instantTime = Option.empty();
        if (cfg.retryLastFailedClusteringJob) {
            HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(client.getConfig(), client.getEngineContext());
            HoodieTimeline inflightHoodieTimeline = table.getActiveTimeline().filterPendingReplaceTimeline().filterInflights();
            if (!inflightHoodieTimeline.empty()) {
                HoodieInstant inflightClusteringInstant = inflightHoodieTimeline.lastInstant().get();
                Date clusteringStartTime = HoodieActiveTimeline.parseDateFromInstantTime(inflightClusteringInstant.getTimestamp());
                if (clusteringStartTime.getTime() + cfg.maxProcessingTimeMs < System.currentTimeMillis()) {
                    // if there has failed clustering, then we will use the failed clustering instant-time to trigger next clustering action which will rollback and clustering.
                    LOG.info("Found failed clustering instant at : " + inflightClusteringInstant + "; Will rollback the failed clustering and re-trigger again.");
                    instantTime = Option.of(inflightHoodieTimeline.lastInstant().get().getTimestamp());
                } else {
                    LOG.info(inflightClusteringInstant + " might still be in progress, will trigger a new clustering job.");
                }
            }
        }
        instantTime = instantTime.isPresent() ? instantTime : doSchedule(client);
        if (!instantTime.isPresent()) {
            LOG.info("Couldn't generate cluster plan");
            return -1;
        }
        LOG.info("The schedule instant time is " + instantTime.get());
        LOG.info("Step 2: Do cluster");
        Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
        return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
    }
}
Also used : HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) HoodieCommitMetadata(org.apache.hudi.common.model.HoodieCommitMetadata) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) HoodieRecordPayload(org.apache.hudi.common.model.HoodieRecordPayload) Date(java.util.Date)

Example 9 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class BaseHoodieWriteClient method getInflightTimelineExcludeCompactionAndClustering.

/**
 * Get inflight time line exclude compaction and clustering.
 * @param metaClient
 * @return
 */
private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTableMetaClient metaClient) {
    HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
    HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
        if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
            Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(metaClient, instant);
            return !instantPlan.isPresent();
        } else {
            return true;
        }
    });
    return inflightTimelineExcludeClusteringCommit;
}
Also used : HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) Pair(org.apache.hudi.common.util.collection.Pair)

Example 10 with HoodieTimeline

use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.

the class AbstractTableFileSystemView method buildFileGroups.

protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieBaseFile> baseFileStream, Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
    Map<Pair<String, String>, List<HoodieBaseFile>> baseFiles = baseFileStream.collect(Collectors.groupingBy((baseFile) -> {
        String partitionPathStr = getPartitionPathFromFilePath(baseFile.getPath());
        return Pair.of(partitionPathStr, baseFile.getFileId());
    }));
    Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
        String partitionPathStr = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), logFile.getPath().getParent());
        return Pair.of(partitionPathStr, logFile.getFileId());
    }));
    Set<Pair<String, String>> fileIdSet = new HashSet<>(baseFiles.keySet());
    fileIdSet.addAll(logFiles.keySet());
    List<HoodieFileGroup> fileGroups = new ArrayList<>();
    fileIdSet.forEach(pair -> {
        String fileId = pair.getValue();
        HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, timeline);
        if (baseFiles.containsKey(pair)) {
            baseFiles.get(pair).forEach(group::addBaseFile);
        }
        if (logFiles.containsKey(pair)) {
            logFiles.get(pair).forEach(group::addLogFile);
        }
        if (addPendingCompactionFileSlice) {
            Option<Pair<String, CompactionOperation>> pendingCompaction = getPendingCompactionOperationWithInstant(group.getFileGroupId());
            if (pendingCompaction.isPresent()) {
                // If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
                // so that any new ingestion uses the correct base-instant
                group.addNewFileSliceAtInstant(pendingCompaction.get().getKey());
            }
        }
        fileGroups.add(group);
    });
    return fileGroups;
}
Also used : BootstrapBaseFileMapping(org.apache.hudi.common.model.BootstrapBaseFileMapping) Arrays(java.util.Arrays) HoodieInstant(org.apache.hudi.common.table.timeline.HoodieInstant) FileSlice(org.apache.hudi.common.model.FileSlice) Option(org.apache.hudi.common.util.Option) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) FileStatus(org.apache.hadoop.fs.FileStatus) HoodieTimer(org.apache.hudi.common.util.HoodieTimer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Logger(org.apache.log4j.Logger) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) HoodieLogFile(org.apache.hudi.common.model.HoodieLogFile) HoodieFileGroupId(org.apache.hudi.common.model.HoodieFileGroupId) HoodieTimeline(org.apache.hudi.common.table.timeline.HoodieTimeline) BootstrapFileMapping(org.apache.hudi.common.model.BootstrapFileMapping) ValidationUtils(org.apache.hudi.common.util.ValidationUtils) BootstrapIndex(org.apache.hudi.common.bootstrap.index.BootstrapIndex) WriteLock(java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock) Predicate(java.util.function.Predicate) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) Serializable(java.io.Serializable) CompactionOperation(org.apache.hudi.common.model.CompactionOperation) HoodieReplaceCommitMetadata(org.apache.hudi.common.model.HoodieReplaceCommitMetadata) HoodieBaseFile(org.apache.hudi.common.model.HoodieBaseFile) AbstractMap(java.util.AbstractMap) List(java.util.List) GREATER_THAN_OR_EQUALS(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS) Stream(java.util.stream.Stream) ClusteringUtils(org.apache.hudi.common.util.ClusteringUtils) HoodieIOException(org.apache.hudi.exception.HoodieIOException) METADATA_BOOTSTRAP_INSTANT_TS(org.apache.hudi.common.table.timeline.HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS) LogManager(org.apache.log4j.LogManager) Comparator(java.util.Comparator) GREATER_THAN(org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN) FSUtils(org.apache.hudi.common.fs.FSUtils) CompactionUtils(org.apache.hudi.common.util.CompactionUtils) Pair(org.apache.hudi.common.util.collection.Pair) Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) HoodieFileGroup(org.apache.hudi.common.model.HoodieFileGroup) ArrayList(java.util.ArrayList) List(java.util.List) Pair(org.apache.hudi.common.util.collection.Pair) HashSet(java.util.HashSet)

Aggregations

HoodieTimeline (org.apache.hudi.common.table.timeline.HoodieTimeline)118 HoodieInstant (org.apache.hudi.common.table.timeline.HoodieInstant)74 HoodieTableMetaClient (org.apache.hudi.common.table.HoodieTableMetaClient)39 List (java.util.List)36 IOException (java.io.IOException)34 HoodieCommitMetadata (org.apache.hudi.common.model.HoodieCommitMetadata)34 ArrayList (java.util.ArrayList)32 Option (org.apache.hudi.common.util.Option)30 Collectors (java.util.stream.Collectors)29 HoodieActiveTimeline (org.apache.hudi.common.table.timeline.HoodieActiveTimeline)29 HoodieException (org.apache.hudi.exception.HoodieException)26 Map (java.util.Map)25 FileStatus (org.apache.hadoop.fs.FileStatus)24 Path (org.apache.hadoop.fs.Path)24 Set (java.util.Set)22 HoodieBaseFile (org.apache.hudi.common.model.HoodieBaseFile)22 FileSlice (org.apache.hudi.common.model.FileSlice)21 HoodieLogFile (org.apache.hudi.common.model.HoodieLogFile)21 Pair (org.apache.hudi.common.util.collection.Pair)21 FSUtils (org.apache.hudi.common.fs.FSUtils)20