use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testUpsertsInternal.
/**
* Test one of HoodieWriteClient upsert(Prepped) APIs.
*
* @param config Write Config
* @param writeFn One of Hoodie Write Function API
* @throws Exception in case of error
*/
private void testUpsertsInternal(HoodieWriteConfig config, Function3<JavaRDD<WriteStatus>, SparkRDDWriteClient, JavaRDD<HoodieRecord>, String> writeFn, boolean isPrepped) throws Exception {
// Force using older timeline layout
HoodieWriteConfig hoodieWriteConfig = getConfigBuilder(HoodieFailedWritesCleaningPolicy.LAZY).withRollbackUsingMarkers(true).withProps(config.getProps()).withTimelineLayoutVersion(VERSION_0).build();
HoodieTableMetaClient.withPropertyBuilder().fromMetaClient(metaClient).setTimelineLayoutVersion(VERSION_0).setPopulateMetaFields(config.populateMetaFields()).initTable(metaClient.getHadoopConf(), metaClient.getBasePath());
SparkRDDWriteClient client = getHoodieWriteClient(hoodieWriteConfig);
// Write 1 (only inserts)
String newCommitTime = "001";
String initCommitTime = "000";
int numRecords = 200;
insertFirstBatch(hoodieWriteConfig, client, newCommitTime, initCommitTime, numRecords, SparkRDDWriteClient::insert, isPrepped, true, numRecords, config.populateMetaFields());
// Write 2 (updates)
String prevCommitTime = newCommitTime;
newCommitTime = "004";
numRecords = 100;
String commitTimeBetweenPrevAndNew = "002";
updateBatch(hoodieWriteConfig, client, newCommitTime, prevCommitTime, Option.of(Arrays.asList(commitTimeBetweenPrevAndNew)), initCommitTime, numRecords, writeFn, isPrepped, true, numRecords, 200, 2, config.populateMetaFields());
// Delete 1
prevCommitTime = newCommitTime;
newCommitTime = "005";
numRecords = 50;
deleteBatch(hoodieWriteConfig, client, newCommitTime, prevCommitTime, initCommitTime, numRecords, SparkRDDWriteClient::delete, isPrepped, true, 0, 150, config.populateMetaFields());
// Now simulate an upgrade and perform a restore operation
HoodieWriteConfig newConfig = getConfigBuilder().withProps(config.getProps()).withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION).build();
client = getHoodieWriteClient(newConfig);
client.savepoint("004", "user1", "comment1");
client.restoreToInstant("004");
assertFalse(metaClient.reloadActiveTimeline().getRollbackTimeline().lastInstant().isPresent());
// Check the entire dataset has all records still
String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
for (int i = 0; i < fullPartitionPaths.length; i++) {
fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
}
assertEquals(200, HoodieClientTestUtils.read(jsc, basePath, sqlContext, fs, fullPartitionPaths).count(), "Must contain " + 200 + " records");
// Perform Delete again on upgraded dataset.
prevCommitTime = newCommitTime;
newCommitTime = "006";
numRecords = 50;
deleteBatch(newConfig, client, newCommitTime, prevCommitTime, initCommitTime, numRecords, SparkRDDWriteClient::delete, isPrepped, true, 0, 150);
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
assertEquals(5, instants.size());
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "001"), instants.get(0));
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "004"), instants.get(1));
// New Format should have all states of instants
assertEquals(new HoodieInstant(REQUESTED, COMMIT_ACTION, "006"), instants.get(2));
assertEquals(new HoodieInstant(INFLIGHT, COMMIT_ACTION, "006"), instants.get(3));
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "006"), instants.get(4));
final HoodieWriteConfig cfg = hoodieWriteConfig;
final String instantTime = "007";
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build();
String basePathStr = basePath;
HoodieTable table = getHoodieTable(metaClient, cfg);
String extension = metaClient.getTableConfig().getBaseFileFormat().getFileExtension();
jsc.parallelize(Arrays.asList(1)).map(e -> {
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(metaClient.getActiveTimeline().getInstantDetails(metaClient.getCommitsTimeline().filterCompletedInstants().lastInstant().get()).get(), HoodieCommitMetadata.class);
String filePath = commitMetadata.getPartitionToWriteStats().values().stream().flatMap(w -> w.stream()).filter(s -> s.getPath().endsWith(extension)).findAny().map(ee -> ee.getPath()).orElse(null);
String partitionPath = commitMetadata.getPartitionToWriteStats().values().stream().flatMap(w -> w.stream()).filter(s -> s.getPath().endsWith(extension)).findAny().map(ee -> ee.getPartitionPath()).orElse(null);
Path baseFilePath = new Path(basePathStr, filePath);
HoodieBaseFile baseFile = new HoodieBaseFile(baseFilePath.toString());
try {
HoodieMergeHandle handle = new HoodieMergeHandle(cfg, instantTime, table, new HashMap<>(), partitionPath, FSUtils.getFileId(baseFilePath.getName()), baseFile, new SparkTaskContextSupplier(), config.populateMetaFields() ? Option.empty() : Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(config.getProps()))));
WriteStatus writeStatus = new WriteStatus(false, 0.0);
writeStatus.setStat(new HoodieWriteStat());
writeStatus.getStat().setNumWrites(0);
handle.performMergeDataValidationCheck(writeStatus);
} catch (HoodieCorruptedDataException e1) {
fail("Exception not expected because merge validation check is disabled");
}
try {
final String newInstantTime = "006";
cfg.getProps().setProperty("hoodie.merge.data.validation.enabled", "true");
HoodieWriteConfig cfg2 = HoodieWriteConfig.newBuilder().withProps(cfg.getProps()).build();
HoodieMergeHandle handle = new HoodieMergeHandle(cfg2, newInstantTime, table, new HashMap<>(), partitionPath, FSUtils.getFileId(baseFilePath.getName()), baseFile, new SparkTaskContextSupplier(), config.populateMetaFields() ? Option.empty() : Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(config.getProps()))));
WriteStatus writeStatus = new WriteStatus(false, 0.0);
writeStatus.setStat(new HoodieWriteStat());
writeStatus.getStat().setNumWrites(0);
handle.performMergeDataValidationCheck(writeStatus);
fail("The above line should have thrown an exception");
} catch (HoodieCorruptedDataException e2) {
// expected
}
return true;
}).collect();
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method testMetadataStatsOnCommit.
/**
* Test to ensure commit metadata points to valid files.10.
*/
@ParameterizedTest
@MethodSource("populateMetaFieldsParams")
public void testMetadataStatsOnCommit(boolean populateMetaFields) throws Exception {
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder().withAutoCommit(false);
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
HoodieWriteConfig cfg = cfgBuilder.build();
SparkRDDWriteClient client = getHoodieWriteClient(cfg);
String instantTime0 = "000";
client.startCommitWithTime(instantTime0);
List<HoodieRecord> records0 = dataGen.generateInserts(instantTime0, 200);
JavaRDD<HoodieRecord> writeRecords0 = jsc.parallelize(records0, 1);
JavaRDD<WriteStatus> result0 = client.bulkInsert(writeRecords0, instantTime0);
assertTrue(client.commit(instantTime0, result0), "Commit should succeed");
assertTrue(testTable.commitExists(instantTime0), "After explicit commit, commit file should be created");
// Read from commit file
try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime0))) {
String everything = FileIOUtils.readAsUTFString(inputStream);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class);
int inserts = 0;
for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat : pstat.getValue()) {
inserts += stat.getNumInserts();
}
}
assertEquals(200, inserts);
}
// Update + Inserts such that they just expand file1
String instantTime1 = "001";
client.startCommitWithTime(instantTime1);
List<HoodieRecord> records1 = dataGen.generateUpdates(instantTime1, records0);
JavaRDD<HoodieRecord> writeRecords1 = jsc.parallelize(records1, 1);
JavaRDD<WriteStatus> result1 = client.upsert(writeRecords1, instantTime1);
assertTrue(client.commit(instantTime1, result1), "Commit should succeed");
assertTrue(testTable.commitExists(instantTime1), "After explicit commit, commit file should be created");
// Read from commit file
try (FSDataInputStream inputStream = fs.open(testTable.getCommitFilePath(instantTime1))) {
String everything = FileIOUtils.readAsUTFString(inputStream);
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything, HoodieCommitMetadata.class);
int inserts = 0;
int upserts = 0;
for (Map.Entry<String, List<HoodieWriteStat>> pstat : metadata.getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat : pstat.getValue()) {
inserts += stat.getNumInserts();
upserts += stat.getNumUpdateWrites();
}
}
assertEquals(0, inserts);
assertEquals(200, upserts);
}
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class TestClientRollback method testFailedRollbackCommit.
/**
* Test Cases for effects of rollbacking completed/inflight commits.
*/
@Test
public void testFailedRollbackCommit() throws Exception {
// Let's create some commit files and base files
final String p1 = "2016/05/01";
final String p2 = "2016/05/02";
final String p3 = "2016/05/06";
final String commitTime1 = "20160501010101";
final String commitTime2 = "20160502020601";
final String commitTime3 = "20160506030611";
Map<String, String> partitionAndFileId1 = new HashMap<String, String>() {
{
put(p1, "id11");
put(p2, "id12");
put(p3, "id13");
}
};
Map<String, String> partitionAndFileId2 = new HashMap<String, String>() {
{
put(p1, "id21");
put(p2, "id22");
put(p3, "id23");
}
};
Map<String, String> partitionAndFileId3 = new HashMap<String, String>() {
{
put(p1, "id31");
put(p2, "id32");
put(p3, "id33");
}
};
HoodieTestTable testTable = HoodieTestTable.of(metaClient).withPartitionMetaFiles(p1, p2, p3).addCommit(commitTime1).withBaseFilesInPartitions(partitionAndFileId1).addCommit(commitTime2).withBaseFilesInPartitions(partitionAndFileId2).addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withRollbackUsingMarkers(false).withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).build()).withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
// Rollback commit3
client.rollback(commitTime3);
assertFalse(testTable.inflightCommitExists(commitTime3));
assertFalse(testTable.baseFilesExist(partitionAndFileId3, commitTime3));
assertTrue(testTable.baseFilesExist(partitionAndFileId2, commitTime2));
metaClient.reloadActiveTimeline();
List<HoodieInstant> rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(rollbackInstants.size(), 1);
HoodieInstant rollbackInstant = rollbackInstants.get(0);
// delete rollback completed meta file and retry rollback.
FileCreateUtils.deleteRollbackCommit(basePath, rollbackInstant.getTimestamp());
// recreate actual commit files so that we can retry the rollback
testTable.addInflightCommit(commitTime3).withBaseFilesInPartitions(partitionAndFileId3);
// retry rolling back the commit again.
client.rollback(commitTime3);
// verify there are no extra rollback instants
metaClient.reloadActiveTimeline();
rollbackInstants = metaClient.getActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(rollbackInstants.size(), 1);
assertEquals(rollbackInstants.get(0), rollbackInstant);
final String commitTime4 = "20160507040601";
final String commitTime5 = "20160507050611";
// add inflight compaction then rolls it back
testTable.addInflightCompaction(commitTime4, new HoodieCommitMetadata());
HoodieRollbackPlan rollbackPlan = new HoodieRollbackPlan();
rollbackPlan.setRollbackRequests(Collections.emptyList());
rollbackPlan.setInstantToRollback(new HoodieInstantInfo(commitTime4, HoodieTimeline.COMPACTION_ACTION));
testTable.addRequestedRollback(commitTime5, rollbackPlan);
// the compaction instants should be excluded
metaClient.reloadActiveTimeline();
assertEquals(0, client.getPendingRollbackInfos(metaClient).size());
// verify there is no extra rollback instants
client.rollback(commitTime4);
metaClient.reloadActiveTimeline();
rollbackInstants = metaClient.reloadActiveTimeline().getRollbackTimeline().getInstants().collect(Collectors.toList());
assertEquals(2, rollbackInstants.size());
}
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class SparkBootstrapCommitActionExecutor method commit.
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<HoodieData<WriteStatus>> result, List<HoodieWriteStat> stats) {
String actionType = table.getMetaClient().getCommitActionType();
LOG.info("Committing " + instantTime + ", action Type " + actionType);
// Create a Hoodie table which encapsulated the commits and files visible
HoodieSparkTable table = HoodieSparkTable.create(config, context);
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
result.setCommitted(true);
stats.forEach(stat -> metadata.addWriteStat(stat.getPartitionPath(), stat));
result.setWriteStats(stats);
// Finalize write
finalizeWrite(instantTime, stats, result);
// add in extra metadata
if (extraMetadata.isPresent()) {
extraMetadata.get().forEach(metadata::addMetadata);
}
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, getSchemaToStoreInCommit());
metadata.setOperationType(operationType);
writeTableMetadata(metadata, actionType);
try {
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, instantTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
LOG.info("Committed " + instantTime);
} catch (IOException e) {
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, e);
}
result.setCommitMetadata(Option.of(metadata));
}
use of org.apache.hudi.common.model.HoodieCommitMetadata in project hudi by apache.
the class BaseSparkCommitActionExecutor method commit.
protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<HoodieData<WriteStatus>> result, List<HoodieWriteStat> writeStats) {
String actionType = getCommitActionType();
LOG.info("Committing " + instantTime + ", action Type " + actionType + ", operation Type " + operationType);
result.setCommitted(true);
result.setWriteStats(writeStats);
// Finalize write
finalizeWrite(instantTime, writeStats, result);
try {
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
HoodieCommitMetadata metadata = result.getCommitMetadata().get();
writeTableMetadata(metadata, actionType);
activeTimeline.saveAsComplete(new HoodieInstant(true, getCommitActionType(), instantTime), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
LOG.info("Committed " + instantTime);
result.setCommitMetadata(Option.of(metadata));
} catch (IOException e) {
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + instantTime, e);
}
}
Aggregations