use of org.apache.hudi.common.util.Option in project hudi by apache.
the class BaseRestoreActionExecutor method getInstantsToRollback.
private List<HoodieInstant> getInstantsToRollback(HoodieInstant restoreInstant) throws IOException {
List<HoodieInstant> instantsToRollback = new ArrayList<>();
HoodieRestorePlan restorePlan = RestoreUtils.getRestorePlan(table.getMetaClient(), restoreInstant);
for (HoodieInstantInfo instantInfo : restorePlan.getInstantsToRollback()) {
// If restore crashed mid-way, there are chances that some commits are already rolled back,
// but some are not. so, we can ignore those commits which are fully rolledback in previous attempt if any.
Option<HoodieInstant> rollbackInstantOpt = table.getActiveTimeline().getWriteTimeline().filter(instant -> instant.getTimestamp().equals(instantInfo.getCommitTime()) && instant.getAction().equals(instantInfo.getAction())).firstInstant();
if (rollbackInstantOpt.isPresent()) {
instantsToRollback.add(rollbackInstantOpt.get());
} else {
LOG.warn("Ignoring already rolledback instant " + instantInfo.toString());
}
}
return instantsToRollback;
}
use of org.apache.hudi.common.util.Option in project hudi by apache.
the class BaseRestoreActionExecutor method execute.
@Override
public HoodieRestoreMetadata execute() {
HoodieTimer restoreTimer = new HoodieTimer();
restoreTimer.startTimer();
Option<HoodieInstant> restoreInstant = table.getRestoreTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
if (!restoreInstant.isPresent()) {
throw new HoodieRollbackException("No pending restore instants found to execute restore");
}
try {
List<HoodieInstant> instantsToRollback = getInstantsToRollback(restoreInstant.get());
ValidationUtils.checkArgument(restoreInstant.get().getState().equals(HoodieInstant.State.REQUESTED) || restoreInstant.get().getState().equals(HoodieInstant.State.INFLIGHT));
Map<String, List<HoodieRollbackMetadata>> instantToMetadata = new HashMap<>();
if (restoreInstant.get().isRequested()) {
table.getActiveTimeline().transitionRestoreRequestedToInflight(restoreInstant.get());
}
instantsToRollback.forEach(instant -> {
instantToMetadata.put(instant.getTimestamp(), Collections.singletonList(rollbackInstant(instant)));
LOG.info("Deleted instant " + instant);
});
return finishRestore(instantToMetadata, instantsToRollback, restoreTimer.endTimer());
} catch (IOException io) {
throw new HoodieRestoreException("unable to Restore instant " + restoreInstant.get(), io);
}
}
use of org.apache.hudi.common.util.Option in project hudi by apache.
the class BaseRollbackActionExecutor method execute.
@Override
public HoodieRollbackMetadata execute() {
table.getMetaClient().reloadActiveTimeline();
Option<HoodieInstant> rollbackInstant = table.getRollbackTimeline().filterInflightsAndRequested().filter(instant -> instant.getTimestamp().equals(instantTime)).firstInstant();
if (!rollbackInstant.isPresent()) {
throw new HoodieRollbackException("No pending rollback instants found to execute rollback");
}
try {
HoodieRollbackPlan rollbackPlan = RollbackUtils.getRollbackPlan(table.getMetaClient(), rollbackInstant.get());
return runRollback(table, rollbackInstant.get(), rollbackPlan);
} catch (IOException e) {
throw new HoodieIOException("Failed to fetch rollback plan for commit " + instantTime, e);
}
}
use of org.apache.hudi.common.util.Option in project hudi by apache.
the class PartitionAwareClusteringPlanStrategy method generateClusteringPlan.
@Override
public Option<HoodieClusteringPlan> generateClusteringPlan() {
HoodieTableMetaClient metaClient = getHoodieTable().getMetaClient();
LOG.info("Scheduling clustering for " + metaClient.getBasePath());
HoodieWriteConfig config = getWriteConfig();
List<String> partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath());
// get matched partitions if set
partitionPaths = getMatchedPartitions(config, partitionPaths);
// filter the partition paths if needed to reduce list status
partitionPaths = filterPartitionPaths(partitionPaths);
if (partitionPaths.isEmpty()) {
// In case no partitions could be picked, return no clustering plan
return Option.empty();
}
List<HoodieClusteringGroup> clusteringGroups = getEngineContext().flatMap(partitionPaths, partitionPath -> {
List<FileSlice> fileSlicesEligible = getFileSlicesEligibleForClustering(partitionPath).collect(Collectors.toList());
return buildClusteringGroupsForPartition(partitionPath, fileSlicesEligible).limit(getWriteConfig().getClusteringMaxNumGroups());
}, partitionPaths.size()).stream().limit(getWriteConfig().getClusteringMaxNumGroups()).collect(Collectors.toList());
if (clusteringGroups.isEmpty()) {
LOG.info("No data available to cluster");
return Option.empty();
}
HoodieClusteringStrategy strategy = HoodieClusteringStrategy.newBuilder().setStrategyClassName(getWriteConfig().getClusteringExecutionStrategyClass()).setStrategyParams(getStrategyParams()).build();
return Option.of(HoodieClusteringPlan.newBuilder().setStrategy(strategy).setInputGroups(clusteringGroups).setExtraMetadata(getExtraMetadata()).setVersion(getPlanVersion()).setPreserveHoodieMetadata(getWriteConfig().isPreserveHoodieCommitMetadataForClustering()).build());
}
use of org.apache.hudi.common.util.Option in project hudi by apache.
the class TestHoodieIndex method testSimpleTagLocationAndUpdate.
@ParameterizedTest
@MethodSource("indexTypeParams")
public void testSimpleTagLocationAndUpdate(IndexType indexType, boolean populateMetaFields, boolean enableMetadataIndex) throws Exception {
setUp(indexType, populateMetaFields, enableMetadataIndex);
String newCommitTime = "001";
int totalRecords = 10 + random.nextInt(20);
List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, totalRecords);
JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
// Test tagLocation without any entries in index
JavaRDD<HoodieRecord> javaRDD = tagLocation(index, writeRecords, hoodieTable);
assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
// Insert totalRecords records
writeClient.startCommitWithTime(newCommitTime);
JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords, newCommitTime);
Assertions.assertNoWriteErrors(writeStatues.collect());
// Now tagLocation for these records, index should not tag them since it was a failed
// commit
javaRDD = tagLocation(index, writeRecords, hoodieTable);
assert (javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size() == 0);
// Now commit this & update location of records inserted and validate no errors
writeClient.commit(newCommitTime, writeStatues);
// Now tagLocation for these records, index should tag them correctly
metaClient = HoodieTableMetaClient.reload(metaClient);
hoodieTable = HoodieSparkTable.create(config, context, metaClient);
javaRDD = tagLocation(index, writeRecords, hoodieTable);
Map<String, String> recordKeyToPartitionPathMap = new HashMap();
List<HoodieRecord> hoodieRecords = writeRecords.collect();
hoodieRecords.forEach(entry -> recordKeyToPartitionPathMap.put(entry.getRecordKey(), entry.getPartitionPath()));
assertEquals(totalRecords, javaRDD.filter(record -> record.isCurrentLocationKnown()).collect().size());
assertEquals(totalRecords, javaRDD.map(record -> record.getKey().getRecordKey()).distinct().count());
assertEquals(totalRecords, javaRDD.filter(record -> (record.getCurrentLocation() != null && record.getCurrentLocation().getInstantTime().equals(newCommitTime))).distinct().count());
javaRDD.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry.getRecordKey()), entry.getPartitionPath(), "PartitionPath mismatch"));
JavaRDD<HoodieKey> hoodieKeyJavaRDD = writeRecords.map(entry -> entry.getKey());
JavaPairRDD<HoodieKey, Option<Pair<String, String>>> recordLocations = getRecordLocations(hoodieKeyJavaRDD, hoodieTable);
List<HoodieKey> hoodieKeys = hoodieKeyJavaRDD.collect();
assertEquals(totalRecords, recordLocations.collect().size());
assertEquals(totalRecords, recordLocations.map(record -> record._1).distinct().count());
recordLocations.foreach(entry -> assertTrue(hoodieKeys.contains(entry._1), "Missing HoodieKey"));
recordLocations.foreach(entry -> assertEquals(recordKeyToPartitionPathMap.get(entry._1.getRecordKey()), entry._1.getPartitionPath(), "PartitionPath mismatch"));
}
Aggregations