use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieDeltaStreamerWithMultiWriter method testUpsertsContinuousModeWithMultipleWritersWithoutConflicts.
@ParameterizedTest
@EnumSource(HoodieTableType.class)
void testUpsertsContinuousModeWithMultipleWritersWithoutConflicts(HoodieTableType tableType) throws Exception {
// NOTE : Overriding the LockProvider to InProcessLockProvider since Zookeeper locks work in unit test but fail on Jenkins with connection timeouts
basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
tableBasePath = basePath + "/testtable_" + tableType;
prepareInitialConfigs(fs(), basePath, "foo");
TypedProperties props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
// Keep it higher than batch-size to test continuous mode
int totalRecords = 3000;
HoodieDeltaStreamer.Config prepJobConfig = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT, propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
prepJobConfig.continuousMode = true;
prepJobConfig.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
prepJobConfig.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
HoodieDeltaStreamer prepJob = new HoodieDeltaStreamer(prepJobConfig, jsc());
// Prepare base dataset with some commits
deltaStreamerTestRunner(prepJob, prepJobConfig, (r) -> {
if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNDeltaCommits(3, tableBasePath, fs());
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(1, tableBasePath, fs());
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
return true;
});
// create new ingestion & backfill job config to generate only INSERTS to avoid conflict
props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
props.setProperty("hoodie.test.source.generate.inserts", "true");
UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER);
HoodieDeltaStreamer.Config cfgBackfillJob2 = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.INSERT, propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TestIdentityTransformer.class.getName()));
cfgBackfillJob2.continuousMode = false;
HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(hadoopConf()).setBasePath(tableBasePath).build();
HoodieTimeline timeline = meta.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(timeline.firstInstant().get()).get(), HoodieCommitMetadata.class);
cfgBackfillJob2.checkpoint = commitMetadata.getMetadata(CHECKPOINT_KEY);
cfgBackfillJob2.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
cfgBackfillJob2.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
HoodieDeltaStreamer.Config cfgIngestionJob2 = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT, propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TestIdentityTransformer.class.getName()));
cfgIngestionJob2.continuousMode = true;
cfgIngestionJob2.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
cfgIngestionJob2.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
// re-init ingestion job
HoodieDeltaStreamer ingestionJob3 = new HoodieDeltaStreamer(cfgIngestionJob2, jsc());
// re-init backfill job
HoodieDeltaStreamer backfillJob2 = new HoodieDeltaStreamer(cfgBackfillJob2, jsc());
// run ingestion & backfill in parallel, avoid conflict and succeed both
runJobsInParallel(tableBasePath, tableType, totalRecords, ingestionJob3, cfgIngestionJob2, backfillJob2, cfgBackfillJob2, false, "batch2");
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class TestHoodieDeltaStreamerWithMultiWriter method testLatestCheckpointCarryOverWithMultipleWriters.
@Disabled
@ParameterizedTest
@EnumSource(value = HoodieTableType.class, names = { "COPY_ON_WRITE" })
void testLatestCheckpointCarryOverWithMultipleWriters(HoodieTableType tableType) throws Exception {
// NOTE : Overriding the LockProvider to InProcessLockProvider since Zookeeper locks work in unit test but fail on Jenkins with connection timeouts
basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
tableBasePath = basePath + "/testtable_" + tableType;
prepareInitialConfigs(fs(), basePath, "foo");
TypedProperties props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
// Keep it higher than batch-size to test continuous mode
int totalRecords = 3000;
HoodieDeltaStreamer.Config prepJobConfig = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT, propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
prepJobConfig.continuousMode = true;
prepJobConfig.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
prepJobConfig.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
HoodieDeltaStreamer prepJob = new HoodieDeltaStreamer(prepJobConfig, jsc());
// Prepare base dataset with some commits
deltaStreamerTestRunner(prepJob, prepJobConfig, (r) -> {
if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNDeltaCommits(3, tableBasePath, fs());
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(1, tableBasePath, fs());
} else {
TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
}
TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
return true;
});
// create a backfill job with checkpoint from the first instant
HoodieDeltaStreamer.Config cfgBackfillJob = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT, propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
cfgBackfillJob.continuousMode = false;
HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(hadoopConf()).setBasePath(tableBasePath).build();
HoodieTimeline timeline = meta.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
HoodieCommitMetadata commitMetadataForFirstInstant = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(timeline.firstInstant().get()).get(), HoodieCommitMetadata.class);
// run the backfill job
props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
// get current checkpoint after preparing base dataset with some commits
HoodieCommitMetadata commitMetadataForLastInstant = getLatestMetadata(meta);
// Set checkpoint to the last successful position
cfgBackfillJob.checkpoint = commitMetadataForLastInstant.getMetadata(CHECKPOINT_KEY);
cfgBackfillJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
HoodieDeltaStreamer backfillJob = new HoodieDeltaStreamer(cfgBackfillJob, jsc());
backfillJob.sync();
meta.reloadActiveTimeline();
int totalCommits = meta.getCommitsTimeline().filterCompletedInstants().countInstants();
// add a new commit to timeline which may not have the checkpoint in extra metadata
addCommitToTimeline(meta);
meta.reloadActiveTimeline();
verifyCommitMetadataCheckpoint(meta, null);
cfgBackfillJob.checkpoint = null;
// if deltastreamer checkpoint fetch does not walk back to older commits, this sync will fail
new HoodieDeltaStreamer(cfgBackfillJob, jsc()).sync();
meta.reloadActiveTimeline();
Assertions.assertEquals(totalCommits + 2, meta.getCommitsTimeline().filterCompletedInstants().countInstants());
verifyCommitMetadataCheckpoint(meta, "00008");
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class BaseRollbackActionExecutor method validateRollbackCommitSequence.
private void validateRollbackCommitSequence() {
// writer mode.
if (config.getFailedWritesCleanPolicy().isEager()) {
final String instantTimeToRollback = instantToRollback.getTimestamp();
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
// If there is a commit in-between or after that is not rolled back, then abort
if ((instantTimeToRollback != null) && !commitTimeline.empty() && !commitTimeline.findInstantsAfter(instantTimeToRollback, Integer.MAX_VALUE).empty()) {
// check if remnants are from a previous LAZY rollback config, if yes, let out of order rollback continue
try {
if (!HoodieHeartbeatClient.heartbeatExists(table.getMetaClient().getFs(), config.getBasePath(), instantTimeToRollback)) {
throw new HoodieRollbackException("Found commits after time :" + instantTimeToRollback + ", please rollback greater commits first");
}
} catch (IOException io) {
throw new HoodieRollbackException("Unable to rollback commits ", io);
}
}
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
if ((instantTimeToRollback != null) && !inflights.isEmpty() && (inflights.indexOf(instantTimeToRollback) != inflights.size() - 1)) {
throw new HoodieRollbackException("Found in-flight commits after time :" + instantTimeToRollback + ", please rollback greater commits first");
}
}
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class OneToZeroDowngradeHandler method downgrade.
@Override
public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
// fetch pending commit info
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
for (HoodieInstant inflightInstant : commits) {
// delete existing markers
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, inflightInstant.getTimestamp());
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
}
return Collections.EMPTY_MAP;
}
use of org.apache.hudi.common.table.timeline.HoodieTimeline in project hudi by apache.
the class ZeroToOneUpgradeHandler method upgrade.
@Override
public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
// fetch pending commit info
HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
if (commits.size() > 0 && instantTime != null) {
// ignore the latest inflight commit since a new commit would have been started and we need to fix any pending commits from previous launch
commits.remove(instantTime);
}
for (String commit : commits) {
// for every pending commit, delete old markers and re-create markers in new format
recreateMarkers(commit, table, context, config.getMarkersDeleteParallelism());
}
return Collections.EMPTY_MAP;
}
Aggregations