use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class DeltaSync method getCheckpointToResume.
/**
* Process previous commit metadata and checkpoint configs set by user to determine the checkpoint to resume from.
* @param commitTimelineOpt commit timeline of interest.
* @return the checkpoint to resume from if applicable.
* @throws IOException
*/
private Option<String> getCheckpointToResume(Option<HoodieTimeline> commitTimelineOpt) throws IOException {
Option<String> resumeCheckpointStr = Option.empty();
Option<HoodieInstant> lastCommit = commitTimelineOpt.get().lastInstant();
if (lastCommit.isPresent()) {
// if previous commit metadata did not have the checkpoint key, try traversing previous commits until we find one.
Option<HoodieCommitMetadata> commitMetadataOption = getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get());
if (commitMetadataOption.isPresent()) {
HoodieCommitMetadata commitMetadata = commitMetadataOption.get();
LOG.debug("Checkpoint reset from metadata: " + commitMetadata.getMetadata(CHECKPOINT_RESET_KEY));
if (cfg.checkpoint != null && (StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)) || !cfg.checkpoint.equals(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)))) {
resumeCheckpointStr = Option.of(cfg.checkpoint);
} else if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_KEY))) {
// if previous checkpoint is an empty string, skip resume use Option.empty()
resumeCheckpointStr = Option.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
} else if (HoodieTimeline.compareTimestamps(HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, HoodieTimeline.LESSER_THAN, lastCommit.get().getTimestamp())) {
throw new HoodieDeltaStreamerException("Unable to find previous checkpoint. Please double check if this table " + "was indeed built via delta streamer. Last Commit :" + lastCommit + ", Instants :" + commitTimelineOpt.get().getInstants().collect(Collectors.toList()) + ", CommitMetadata=" + commitMetadata.toJsonString());
}
// KAFKA_CHECKPOINT_TYPE will be honored only for first batch.
if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY))) {
props.remove(KafkaOffsetGen.Config.KAFKA_CHECKPOINT_TYPE.key());
}
} else if (cfg.checkpoint != null) {
// getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get()) will never return a commit metadata w/o any checkpoint key set.
resumeCheckpointStr = Option.of(cfg.checkpoint);
}
}
return resumeCheckpointStr;
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HoodieClusteringJob method doScheduleAndCluster.
private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
LOG.info("Step 1: Do schedule");
String schemaStr = getSchemaFromLatestInstant();
try (SparkRDDWriteClient<HoodieRecordPayload> client = UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
Option<String> instantTime = Option.empty();
if (cfg.retryLastFailedClusteringJob) {
HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(client.getConfig(), client.getEngineContext());
HoodieTimeline inflightHoodieTimeline = table.getActiveTimeline().filterPendingReplaceTimeline().filterInflights();
if (!inflightHoodieTimeline.empty()) {
HoodieInstant inflightClusteringInstant = inflightHoodieTimeline.lastInstant().get();
Date clusteringStartTime = HoodieActiveTimeline.parseDateFromInstantTime(inflightClusteringInstant.getTimestamp());
if (clusteringStartTime.getTime() + cfg.maxProcessingTimeMs < System.currentTimeMillis()) {
// if there has failed clustering, then we will use the failed clustering instant-time to trigger next clustering action which will rollback and clustering.
LOG.info("Found failed clustering instant at : " + inflightClusteringInstant + "; Will rollback the failed clustering and re-trigger again.");
instantTime = Option.of(inflightHoodieTimeline.lastInstant().get().getTimestamp());
} else {
LOG.info(inflightClusteringInstant + " might still be in progress, will trigger a new clustering job.");
}
}
}
instantTime = instantTime.isPresent() ? instantTime : doSchedule(client);
if (!instantTime.isPresent()) {
LOG.info("Couldn't generate cluster plan");
return -1;
}
LOG.info("The schedule instant time is " + instantTime.get());
LOG.info("Step 2: Do cluster");
Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
}
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class HoodieTestCommitMetadataGenerator method generateCommitMetadata.
/**
* Method to generate commit metadata.
*/
private static HoodieCommitMetadata generateCommitMetadata(Map<String, List<String>> partitionToFilePaths, Option<Integer> writes, Option<Integer> updates, Map<String, String> extraMetadata) {
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
for (Map.Entry<String, String> entry : extraMetadata.entrySet()) {
metadata.addMetadata(entry.getKey(), entry.getValue());
}
partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
HoodieWriteStat writeStat = new HoodieWriteStat();
writeStat.setPartitionPath(key);
writeStat.setPath(DEFAULT_PATH);
writeStat.setFileId(DEFAULT_FILEID);
writeStat.setTotalWriteBytes(DEFAULT_TOTAL_WRITE_BYTES);
writeStat.setPrevCommit(DEFAULT_PRE_COMMIT);
writeStat.setNumWrites(writes.orElse(DEFAULT_NUM_WRITES));
writeStat.setNumUpdateWrites(updates.orElse(DEFAULT_NUM_UPDATE_WRITES));
writeStat.setTotalLogBlocks(DEFAULT_TOTAL_LOG_BLOCKS);
writeStat.setTotalLogRecords(DEFAULT_TOTAL_LOG_RECORDS);
metadata.addWriteStat(key, writeStat);
}));
return metadata;
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TransactionUtils method resolveWriteConflictIfAny.
/**
* Resolve any write conflicts when committing data.
*
* @param table
* @param currentTxnOwnerInstant
* @param thisCommitMetadata
* @param config
* @param lastCompletedTxnOwnerInstant
* @return
* @throws HoodieWriteConflictException
*/
public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(final HoodieTable table, final Option<HoodieInstant> currentTxnOwnerInstant, final Option<HoodieCommitMetadata> thisCommitMetadata, final HoodieWriteConfig config, Option<HoodieInstant> lastCompletedTxnOwnerInstant, boolean reloadActiveTimeline) throws HoodieWriteConflictException {
if (config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(reloadActiveTimeline ? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata()));
instantStream.forEach(instant -> {
try {
ConcurrentOperation otherOperation = new ConcurrentOperation(instant, table.getMetaClient());
if (resolutionStrategy.hasConflict(thisOperation, otherOperation)) {
LOG.info("Conflict encountered between current instant = " + thisOperation + " and instant = " + otherOperation + ", attempting to resolve it...");
resolutionStrategy.resolveConflict(table, thisOperation, otherOperation);
}
} catch (IOException io) {
throw new HoodieWriteConflictException("Unable to resolve conflict, if present", io);
}
});
LOG.info("Successfully resolved conflicts, if any");
return thisOperation.getCommitMetadataOption();
}
return thisCommitMetadata;
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class CommitUtils method buildMetadata.
public static HoodieCommitMetadata buildMetadata(List<HoodieWriteStat> writeStats, Map<String, List<String>> partitionToReplaceFileIds, Option<Map<String, String>> extraMetadata, WriteOperationType operationType, String schemaToStoreInCommit, String commitActionType) {
HoodieCommitMetadata commitMetadata = buildMetadataFromStats(writeStats, partitionToReplaceFileIds, commitActionType, operationType);
// add in extra metadata
if (extraMetadata.isPresent()) {
extraMetadata.get().forEach(commitMetadata::addMetadata);
}
commitMetadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, (schemaToStoreInCommit == null || schemaToStoreInCommit.equals(NULL_SCHEMA_STR)) ? "" : schemaToStoreInCommit);
commitMetadata.setOperationType(operationType);
return commitMetadata;
}
Aggregations