use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.
the class HoodieMetadataMetrics method getStats.
public Map<String, String> getStats(boolean detailed, HoodieTableMetaClient metaClient, HoodieTableMetadata metadata) {
try {
metaClient.reloadActiveTimeline();
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline());
return getStats(fsView, detailed, metadata);
} catch (IOException ioe) {
throw new HoodieIOException("Unable to get metadata stats.", ioe);
}
}
use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.
the class TestCompactionAdminClient method validateUnSchedulePlan.
/**
* Validate Unschedule operations.
*/
private List<Pair<HoodieLogFile, HoodieLogFile>> validateUnSchedulePlan(CompactionAdminClient client, String ingestionInstant, String compactionInstant, int numEntriesPerInstant, int expNumRenames, boolean skipUnSchedule) throws Exception {
ensureValidCompactionPlan(compactionInstant);
// Check suggested rename operations
List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles = client.getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, 1, Option.empty(), false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
// Log files belonging to file-slices created because of compaction request must be renamed
Set<HoodieLogFile> gotLogFilesToBeRenamed = renameFiles.stream().map(Pair::getLeft).collect(Collectors.toSet());
final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
Set<HoodieLogFile> expLogFilesToBeRenamed = fsView.getLatestFileSlices(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).flatMap(FileSlice::getLogFiles).collect(Collectors.toSet());
assertEquals(expLogFilesToBeRenamed, gotLogFilesToBeRenamed, "Log files belonging to file-slices created because of compaction request must be renamed");
if (skipUnSchedule) {
// Do the renaming only but do not touch the compaction plan - Needed for repair tests
renameFiles.forEach(lfPair -> {
try {
renameLogFile(metaClient, lfPair.getLeft(), lfPair.getRight());
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
});
} else {
validateRenameFiles(renameFiles, ingestionInstant, compactionInstant, fsView);
}
Map<String, Long> fileIdToCountsBeforeRenaming = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
// Call the main unschedule API
client.unscheduleCompactionPlan(compactionInstant, false, 1, false);
metaClient = HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTableFileSystemView newFsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
// Expect all file-slice whose base-commit is same as compaction commit to contain no new Log files
newFsView.getLatestFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], compactionInstant, true).filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)).forEach(fs -> {
assertFalse(fs.getBaseFile().isPresent(), "No Data file must be present");
assertEquals(0, fs.getLogFiles().count(), "No Log Files");
});
// Ensure same number of log-files before and after renaming per fileId
Map<String, Long> fileIdToCountsAfterRenaming = newFsView.getAllFileGroups(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0]).flatMap(HoodieFileGroup::getAllFileSlices).filter(fs -> fs.getBaseInstantTime().equals(ingestionInstant)).map(fs -> Pair.of(fs.getFileId(), fs.getLogFiles().count())).collect(Collectors.toMap(Pair::getKey, Pair::getValue));
assertEquals(fileIdToCountsBeforeRenaming, fileIdToCountsAfterRenaming, "Each File Id has same number of log-files");
assertEquals(numEntriesPerInstant, fileIdToCountsAfterRenaming.size(), "Not Empty");
assertEquals(expNumRenames, renameFiles.size(), "Expected number of renames");
return renameFiles;
}
use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.
the class TestCompactionAdminClient method validateRenameFiles.
private void validateRenameFiles(List<Pair<HoodieLogFile, HoodieLogFile>> renameFiles, String ingestionInstant, String compactionInstant, HoodieTableFileSystemView fsView) {
// Ensure new names of log-files are on expected lines
Set<HoodieLogFile> uniqNewLogFiles = new HashSet<>();
Set<HoodieLogFile> uniqOldLogFiles = new HashSet<>();
renameFiles.forEach(lfPair -> {
assertFalse(uniqOldLogFiles.contains(lfPair.getKey()), "Old Log File Names do not collide");
assertFalse(uniqNewLogFiles.contains(lfPair.getValue()), "New Log File Names do not collide");
uniqOldLogFiles.add(lfPair.getKey());
uniqNewLogFiles.add(lfPair.getValue());
});
renameFiles.forEach(lfPair -> {
HoodieLogFile oldLogFile = lfPair.getLeft();
HoodieLogFile newLogFile = lfPair.getValue();
assertEquals(ingestionInstant, newLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(compactionInstant, oldLogFile.getBaseCommitTime(), "Base Commit time is expected");
assertEquals(oldLogFile.getFileId(), newLogFile.getFileId(), "File Id is expected");
HoodieLogFile lastLogFileBeforeCompaction = fsView.getLatestMergedFileSlicesBeforeOrOn(HoodieTestUtils.DEFAULT_PARTITION_PATHS[0], ingestionInstant).filter(fs -> fs.getFileId().equals(oldLogFile.getFileId())).map(fs -> fs.getLogFiles().findFirst().get()).findFirst().get();
assertEquals(lastLogFileBeforeCompaction.getLogVersion() + oldLogFile.getLogVersion(), newLogFile.getLogVersion(), "Log Version expected");
assertTrue(newLogFile.getLogVersion() > lastLogFileBeforeCompaction.getLogVersion(), "Log version does not collide");
});
}
use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.
the class HoodieBackedTableMetadataWriter method prepRecords.
/**
* Tag each record with the location in the given partition.
* The record is tagged with respective file slice's location based on its record key.
*/
protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap) {
// The result set
HoodieData<HoodieRecord> allPartitionRecords = engineContext.emptyHoodieData();
HoodieTableFileSystemView fsView = HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : partitionRecordsMap.entrySet()) {
final String partitionName = entry.getKey().getPartitionPath();
final int fileGroupCount = entry.getKey().getFileGroupCount();
HoodieData<HoodieRecord> records = entry.getValue();
List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount, String.format("Invalid number of file groups for partition:%s, found=%d, required=%d", partitionName, fileSlices.size(), fileGroupCount));
HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), fileGroupCount));
r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
return r;
});
allPartitionRecords = allPartitionRecords.union(rddSinglePartitionRecords);
}
return allPartitionRecords;
}
use of org.apache.hudi.common.table.view.HoodieTableFileSystemView in project hudi by apache.
the class CompactionAdminClient method validateCompactionPlan.
/**
* Validate all compaction operations in a compaction plan. Verifies the file-slices are consistent with corresponding
* compaction operations.
*
* @param metaClient Hoodie Table Meta Client
* @param compactionInstant Compaction Instant
*/
public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism) throws IOException {
HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
if (plan.getOperations() != null) {
List<CompactionOperation> ops = plan.getOperations().stream().map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
context.setJobStatus(this.getClass().getSimpleName(), "Validate compaction operations");
return context.map(ops, op -> {
try {
return validateCompactionOperation(metaClient, compactionInstant, op, Option.of(fsView));
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
}, parallelism);
}
return new ArrayList<>();
}
Aggregations