use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class AbstractTableFileSystemView method getBaseFileOn.
@Override
public final Option<HoodieBaseFile> getBaseFileOn(String partitionStr, String instantTime, String fileId) {
try {
readLock.lock();
String partitionPath = formatPartitionKey(partitionStr);
ensurePartitionLoadedCorrectly(partitionPath);
if (isFileGroupReplacedBeforeOrOn(new HoodieFileGroupId(partitionPath, fileId), instantTime)) {
return Option.empty();
} else {
return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllBaseFiles().filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.EQUALS, instantTime)).filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst().orElse(null)).map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, fileId), df));
}
} finally {
readLock.unlock();
}
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class AbstractTableFileSystemView method isBaseFileDueToPendingCompaction.
/**
* With async compaction, it is possible to see partial/complete base-files due to inflight-compactions, Ignore those
* base-files.
*
* @param baseFile base File
*/
protected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile) {
final String partitionPath = getPartitionPathFromFilePath(baseFile.getPath());
Option<Pair<String, CompactionOperation>> compactionWithInstantTime = getPendingCompactionOperationWithInstant(new HoodieFileGroupId(partitionPath, baseFile.getFileId()));
return (compactionWithInstantTime.isPresent()) && (null != compactionWithInstantTime.get().getKey()) && baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class ClusteringUtils method getAllFileGroupsInPendingClusteringPlans.
/**
* Get filegroups to pending clustering instant mapping for all pending clustering plans.
* This includes all clustering operations in 'requested' and 'inflight' states.
*/
public static Map<HoodieFileGroupId, HoodieInstant> getAllFileGroupsInPendingClusteringPlans(HoodieTableMetaClient metaClient) {
Stream<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans = getAllPendingClusteringPlans(metaClient);
Stream<Map.Entry<HoodieFileGroupId, HoodieInstant>> resultStream = pendingClusteringPlans.flatMap(clusteringPlan -> getFileGroupEntriesInClusteringPlan(clusteringPlan.getLeft(), clusteringPlan.getRight()));
Map<HoodieFileGroupId, HoodieInstant> resultMap;
try {
resultMap = resultStream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
} catch (Exception e) {
if (e instanceof IllegalStateException && e.getMessage().contains("Duplicate key")) {
throw new HoodieException("Found duplicate file groups pending clustering. If you're running deltastreamer in continuous mode, consider adding delay using --min-sync-interval-seconds. " + "Or consider setting write concurrency mode to optimistic_concurrency_control.", e);
}
throw new HoodieException("Error getting all file groups in pending clustering", e);
}
LOG.info("Found " + resultMap.size() + " files in pending clustering operations");
return resultMap;
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class CompactionUtils method getAllPendingCompactionOperations.
/**
* Get all PartitionPath + file-ids with pending Compaction operations and their target compaction instant time.
*
* @param metaClient Hoodie Table Meta Client
*/
public static Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> getAllPendingCompactionOperations(HoodieTableMetaClient metaClient) {
List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactionPlanWithInstants = getAllPendingCompactionPlans(metaClient);
Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> fgIdToPendingCompactionWithInstantMap = new HashMap<>();
pendingCompactionPlanWithInstants.stream().flatMap(instantPlanPair -> getPendingCompactionOperations(instantPlanPair.getKey(), instantPlanPair.getValue())).forEach(pair -> {
// on some DFSs.
if (fgIdToPendingCompactionWithInstantMap.containsKey(pair.getKey())) {
HoodieCompactionOperation operation = pair.getValue().getValue();
HoodieCompactionOperation anotherOperation = fgIdToPendingCompactionWithInstantMap.get(pair.getKey()).getValue();
if (!operation.equals(anotherOperation)) {
String msg = "Hudi File Id (" + pair.getKey() + ") has more than 1 pending compactions. Instants: " + pair.getValue() + ", " + fgIdToPendingCompactionWithInstantMap.get(pair.getKey());
throw new IllegalStateException(msg);
}
}
fgIdToPendingCompactionWithInstantMap.put(pair.getKey(), pair.getValue());
});
return fgIdToPendingCompactionWithInstantMap;
}
use of org.apache.hudi.common.model.HoodieFileGroupId in project hudi by apache.
the class TestHoodieClientOnCopyOnWriteStorage method performClustering.
private HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(HoodieClusteringConfig clusteringConfig, boolean populateMetaFields, boolean completeClustering, String validatorClasses, String sqlQueryForEqualityValidation, String sqlQueryForSingleResultValidation, Pair<List<HoodieRecord>, List<String>> allRecords) throws IOException {
HoodiePreCommitValidatorConfig validatorConfig = HoodiePreCommitValidatorConfig.newBuilder().withPreCommitValidator(StringUtils.nullToEmpty(validatorClasses)).withPrecommitValidatorEqualitySqlQueries(sqlQueryForEqualityValidation).withPrecommitValidatorSingleResultSqlQueries(sqlQueryForSingleResultValidation).build();
HoodieWriteConfig config = getConfigBuilder().withAutoCommit(false).withPreCommitValidatorConfig(validatorConfig).withProps(populateMetaFields ? new Properties() : getPropertiesForKeyGen()).withClusteringConfig(clusteringConfig).build();
// create client with new config.
SparkRDDWriteClient client = getHoodieWriteClient(config);
String clusteringCommitTime = client.scheduleClustering(Option.empty()).get().toString();
HoodieWriteMetadata<JavaRDD<WriteStatus>> clusterMetadata = client.cluster(clusteringCommitTime, completeClustering);
if (config.isPreserveHoodieCommitMetadataForClustering() && config.populateMetaFields()) {
verifyRecordsWrittenWithPreservedMetadata(new HashSet<>(allRecords.getRight()), allRecords.getLeft(), clusterMetadata.getWriteStatuses().collect());
} else {
verifyRecordsWritten(clusteringCommitTime, populateMetaFields, allRecords.getLeft(), clusterMetadata.getWriteStatuses().collect(), config);
}
Set<HoodieFileGroupId> replacedFileIds = new HashSet<>();
clusterMetadata.getPartitionToReplaceFileIds().entrySet().forEach(partitionFiles -> partitionFiles.getValue().stream().forEach(file -> replacedFileIds.add(new HoodieFileGroupId(partitionFiles.getKey(), file))));
return clusterMetadata;
}
Aggregations