use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method testArchiveCompletedRollbackAndClean.
@ParameterizedTest
@CsvSource({ "true,true", "true,false", "false,true", "false,false" })
public void testArchiveCompletedRollbackAndClean(boolean isEmpty, boolean enableMetadataTable) throws Exception {
init();
int minInstantsToKeep = 2;
int maxInstantsToKeep = 10;
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table").withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minInstantsToKeep, maxInstantsToKeep).build()).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withRemoteServerPort(timelineServicePort).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
int startInstant = 1;
for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant++) {
createCleanMetadata(startInstant + "", false, isEmpty || i % 2 == 0);
}
for (int i = 0; i < maxInstantsToKeep + 1; i++, startInstant += 2) {
createCommitAndRollbackFile(startInstant + 1 + "", startInstant + "", false, isEmpty || i % 2 == 0);
}
if (enableMetadataTable) {
// Simulate a compaction commit in metadata table timeline
// so the archival in data table can happen
createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, Integer.toString(99));
}
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
archiver.archiveIfRequired(context);
Stream<HoodieInstant> currentInstants = metaClient.getActiveTimeline().reload().getInstants();
Map<Object, List<HoodieInstant>> actionInstantMap = currentInstants.collect(Collectors.groupingBy(HoodieInstant::getAction));
assertTrue(actionInstantMap.containsKey("clean"), "Clean Action key must be preset");
assertEquals(minInstantsToKeep, actionInstantMap.get("clean").size(), "Should have min instant");
assertTrue(actionInstantMap.containsKey("rollback"), "Rollback Action key must be preset");
assertEquals(minInstantsToKeep, actionInstantMap.get("rollback").size(), "Should have min instant");
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class TestHoodieTimelineArchiver method verifyArchival.
private void verifyArchival(List<HoodieInstant> expectedArchivedInstants, List<HoodieInstant> expectedActiveInstants, List<HoodieInstant> commitsAfterArchival) {
Collections.sort(expectedActiveInstants, Comparator.comparing(HoodieInstant::getTimestamp));
Collections.sort(commitsAfterArchival, Comparator.comparing(HoodieInstant::getTimestamp));
assertEquals(expectedActiveInstants, commitsAfterArchival);
expectedArchivedInstants.forEach(entry -> assertFalse(commitsAfterArchival.contains(entry)));
HoodieArchivedTimeline archivedTimeline = new HoodieArchivedTimeline(metaClient);
List<HoodieInstant> actualArchivedInstants = archivedTimeline.getInstants().collect(Collectors.toList());
Collections.sort(actualArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
Collections.sort(expectedArchivedInstants, Comparator.comparing(HoodieInstant::getTimestamp));
assertEquals(actualArchivedInstants, expectedArchivedInstants);
HoodieTimeline timeline = metaClient.getActiveTimeline();
expectedArchivedInstants.forEach(entry -> {
// check safety
if (entry.getAction() != HoodieTimeline.ROLLBACK_ACTION) {
assertTrue(timeline.containsOrBeforeTimelineStarts(entry.getTimestamp()), "Archived commits should always be safe");
}
});
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class SparkClientFunctionalTestHarness method updateRecordsInMORTable.
protected void updateRecordsInMORTable(HoodieTableMetaClient metaClient, List<HoodieRecord> records, SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime, boolean doExplicitCommit) throws IOException {
HoodieTableMetaClient reloadedMetaClient = HoodieTableMetaClient.reload(metaClient);
Map<HoodieKey, HoodieRecord> recordsMap = new HashMap<>();
for (HoodieRecord rec : records) {
if (!recordsMap.containsKey(rec.getKey())) {
recordsMap.put(rec.getKey(), rec);
}
}
JavaRDD<WriteStatus> statusesRdd = client.upsert(jsc().parallelize(records, 1), commitTime);
List<WriteStatus> statuses = statusesRdd.collect();
// Verify there are no errors
assertNoWriteErrors(statuses);
if (doExplicitCommit) {
client.commit(commitTime, statusesRdd);
}
assertFileSizesEqual(statuses, status -> FSUtils.getFileSize(reloadedMetaClient.getFs(), new Path(reloadedMetaClient.getBasePath(), status.getStat().getPath())));
Option<HoodieInstant> deltaCommit = reloadedMetaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
assertTrue(deltaCommit.isPresent());
assertEquals(commitTime, deltaCommit.get().getTimestamp(), "Latest Delta commit should match specified time");
Option<HoodieInstant> commit = reloadedMetaClient.getActiveTimeline().getCommitTimeline().firstInstant();
assertFalse(commit.isPresent());
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class DeltaSync method getCheckpointToResume.
/**
* Process previous commit metadata and checkpoint configs set by user to determine the checkpoint to resume from.
* @param commitTimelineOpt commit timeline of interest.
* @return the checkpoint to resume from if applicable.
* @throws IOException
*/
private Option<String> getCheckpointToResume(Option<HoodieTimeline> commitTimelineOpt) throws IOException {
Option<String> resumeCheckpointStr = Option.empty();
Option<HoodieInstant> lastCommit = commitTimelineOpt.get().lastInstant();
if (lastCommit.isPresent()) {
// if previous commit metadata did not have the checkpoint key, try traversing previous commits until we find one.
Option<HoodieCommitMetadata> commitMetadataOption = getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get());
if (commitMetadataOption.isPresent()) {
HoodieCommitMetadata commitMetadata = commitMetadataOption.get();
LOG.debug("Checkpoint reset from metadata: " + commitMetadata.getMetadata(CHECKPOINT_RESET_KEY));
if (cfg.checkpoint != null && (StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)) || !cfg.checkpoint.equals(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)))) {
resumeCheckpointStr = Option.of(cfg.checkpoint);
} else if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_KEY))) {
// if previous checkpoint is an empty string, skip resume use Option.empty()
resumeCheckpointStr = Option.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
} else if (HoodieTimeline.compareTimestamps(HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, HoodieTimeline.LESSER_THAN, lastCommit.get().getTimestamp())) {
throw new HoodieDeltaStreamerException("Unable to find previous checkpoint. Please double check if this table " + "was indeed built via delta streamer. Last Commit :" + lastCommit + ", Instants :" + commitTimelineOpt.get().getInstants().collect(Collectors.toList()) + ", CommitMetadata=" + commitMetadata.toJsonString());
}
// KAFKA_CHECKPOINT_TYPE will be honored only for first batch.
if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY))) {
props.remove(KafkaOffsetGen.Config.KAFKA_CHECKPOINT_TYPE.key());
}
} else if (cfg.checkpoint != null) {
// getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get()) will never return a commit metadata w/o any checkpoint key set.
resumeCheckpointStr = Option.of(cfg.checkpoint);
}
}
return resumeCheckpointStr;
}
use of org.apache.hudi.common.table.timeline.HoodieInstant in project hudi by apache.
the class HoodieClusteringJob method doScheduleAndCluster.
private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
LOG.info("Step 1: Do schedule");
String schemaStr = getSchemaFromLatestInstant();
try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
Option<String> instantTime = Option.empty();
if (cfg.retryLastFailedClusteringJob) {
HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(client.getConfig(), client.getEngineContext());
HoodieTimeline inflightHoodieTimeline = table.getActiveTimeline().filterPendingReplaceTimeline().filterInflights();
if (!inflightHoodieTimeline.empty()) {
HoodieInstant inflightClusteringInstant = inflightHoodieTimeline.lastInstant().get();
Date clusteringStartTime = HoodieActiveTimeline.parseDateFromInstantTime(inflightClusteringInstant.getTimestamp());
if (clusteringStartTime.getTime() + cfg.maxProcessingTimeMs < System.currentTimeMillis()) {
// if there has failed clustering, then we will use the failed clustering instant-time to trigger next clustering action which will rollback and clustering.
LOG.info("Found failed clustering instant at : " + inflightClusteringInstant + "; Will rollback the failed clustering and re-trigger again.");
instantTime = Option.of(inflightHoodieTimeline.lastInstant().get().getTimestamp());
} else {
LOG.info(inflightClusteringInstant + " might still be in progress, will trigger a new clustering job.");
}
}
}
instantTime = instantTime.isPresent() ? instantTime : doSchedule(client);
if (!instantTime.isPresent()) {
LOG.info("Couldn't generate cluster plan");
return -1;
}
LOG.info("The schedule instant time is " + instantTime.get());
LOG.info("Step 2: Do cluster");
Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
}
}
Aggregations