use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieTimelineArchiver method testPendingClusteringWillBlockArchival.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testPendingClusteringWillBlockArchival(boolean enableMetadata) throws Exception {
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 5, 2);
HoodieTestDataGenerator.createPendingReplaceFile(basePath, "00000000", wrapperFs.getConf());
for (int i = 1; i < 8; i++) {
testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 2);
// archival
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
List<HoodieInstant> originalCommits = commitsList.getKey();
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
assertEquals(originalCommits, commitsAfterArchival);
}
HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
assertEquals(7, timeline.countInstants(), "Since we have a pending clustering instant at 00000000, we should never archive any commit after 00000000");
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieTimelineArchiver method verifyArchival.
private void verifyArchival(List<HoodieInstant> expectedArchivedInstants, List<HoodieInstant> expectedActiveInstants, List<HoodieInstant> commitsAfterArchival) {
Collections.sort(expectedActiveInstants, Comparator.comparing(HoodieInstant::getTimestamp));
Collections.sort(commitsAfterArchival, Comparator.comparing(HoodieInstant::getTimestamp));
assertEquals(expectedActiveInstants, commitsAfterArchival);
expectedArchivedInstants.forEach(entry -> assertFalse(commitsAfterArchival.contains(entry)));
HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
List<HoodieInstant> actualArchivedInstants = archivedTimeline.getInstants().collect(Collectors.toList());
Collections.sort(actualArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
Collections.sort(expectedArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
assertEquals(actualArchivedInstants, expectedArchivedInstants);
HoodieTimeline timeline = metaClient.getActiveTimeline();
expectedArchivedInstants.forEach(entry -> {
// check safety
if (entry.getAction() != HoodieTimeline.ROLLBACK_ACTION) {
assertTrue(timeline.containsOrBeforeTimelineStarts(entry.getTimestamp()), "Archived commits should always be safe");
}
});
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class HoodieClusteringJob method doScheduleAndCluster.
private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
LOG.info("Step 1: Do schedule");
String schemaStr = getSchemaFromLatestInstant();
try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
Option<String> instantTime = Option.empty();
if (cfg.retryLastFailedClusteringJob) {
HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(client.getConfig(), client.getEngineContext());
HoodieTimeline inflightHoodieTimeline = table.getActiveTimeline().filterPendingReplaceTimeline().filterInflights();
if (!inflightHoodieTimeline.empty()) {
HoodieInstant inflightClusteringInstant = inflightHoodieTimeline.lastInstant().get();
Date clusteringStartTime = HoodieActiveTimeline.parseDateFromInstantTime(inflightClusteringInstant.getTimestamp());
if (clusteringStartTime.getTime() + cfg.maxProcessingTimeMs < System.currentTimeMillis()) {
// if there has failed clustering, then we will use the failed clustering instant-time to trigger next clustering action which will rollback and clustering.
LOG.info("Found failed clustering instant at : " + inflightClusteringInstant + "; Will rollback the failed clustering and re-trigger again.");
instantTime = Option.of(inflightHoodieTimeline.lastInstant().get().getTimestamp());
} else {
LOG.info(inflightClusteringInstant + " might still be in progress, will trigger a new clustering job.");
}
}
}
instantTime = instantTime.isPresent() ? instantTime : doSchedule(client);
if (!instantTime.isPresent()) {
LOG.info("Couldn't generate cluster plan");
return -1;
}
LOG.info("The schedule instant time is " + instantTime.get());
LOG.info("Step 2: Do cluster");
Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class BaseHoodieWriteClient method getInflightTimelineExcludeCompactionAndClustering.
/**
* Get inflight time line exclude compaction and clustering.
* @param metaClient
* @return
*/
private HoodieTimeline getInflightTimelineExcludeCompactionAndClustering(HoodieTableMetaClient metaClient) {
HoodieTimeline inflightTimelineWithReplaceCommit = metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
HoodieTimeline inflightTimelineExcludeClusteringCommit = inflightTimelineWithReplaceCommit.filter(instant -> {
if (instant.getAction().equals(HoodieTimeline.REPLACE_COMMIT_ACTION)) {
Option<Pair<HoodieInstant, HoodieClusteringPlan>> instantPlan = ClusteringUtils.getClusteringPlan(metaClient, instant);
return !instantPlan.isPresent();
} else {
return true;
}
});
return inflightTimelineExcludeClusteringCommit;
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class AbstractTableFileSystemView method buildFileGroups.
protected List<HoodieFileGroup> buildFileGroups(Stream<HoodieBaseFile> baseFileStream, Stream<HoodieLogFile> logFileStream, HoodieTimeline timeline, boolean addPendingCompactionFileSlice) {
Map<Pair<String, String>, List<HoodieBaseFile>> baseFiles = baseFileStream.collect(Collectors.groupingBy((baseFile) -> {
String partitionPathStr = getPartitionPathFromFilePath(baseFile.getPath());
return Pair.of(partitionPathStr, baseFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = logFileStream.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Set<Pair<String, String>> fileIdSet = new HashSet<>(baseFiles.keySet());
fileIdSet.addAll(logFiles.keySet());
List<HoodieFileGroup> fileGroups = new ArrayList<>();
fileIdSet.forEach(pair -> {
String fileId = pair.getValue();
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, timeline);
if (baseFiles.containsKey(pair)) {
baseFiles.get(pair).forEach(group::addBaseFile);
}
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(group::addLogFile);
}
if (addPendingCompactionFileSlice) {
Option<Pair<String, CompactionOperation>> pendingCompaction = getPendingCompactionOperationWithInstant(group.getFileGroupId());
if (pendingCompaction.isPresent()) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
// so that any new ingestion uses the correct base-instant
group.addNewFileSliceAtInstant(pendingCompaction.get().getKey());
}
}
fileGroups.add(group);
});
return fileGroups;
}
Aggregations