use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class HoodieBackedTableMetadata method readFromBaseAndMergeWithLogRecords.
private List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> readFromBaseAndMergeWithLogRecords(HoodieFileReader baseFileReader, List<String> keys, Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords, List<Long> timings, String partitionName) throws IOException {
List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
// merge with base records
HoodieTimer timer = new HoodieTimer().startTimer();
timer.startTimer();
HoodieRecord<HoodieMetadataPayload> hoodieRecord = null;
// Retrieve record from base file
if (baseFileReader != null) {
HoodieTimer readTimer = new HoodieTimer();
Map<String, GenericRecord> baseFileRecords = baseFileReader.getRecordsByKeys(keys);
for (String key : keys) {
readTimer.startTimer();
if (baseFileRecords.containsKey(key)) {
hoodieRecord = getRecord(Option.of(baseFileRecords.get(key)), partitionName);
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
// merge base file record w/ log record if present
if (logRecords.containsKey(key) && logRecords.get(key).isPresent()) {
HoodieRecordPayload mergedPayload = logRecords.get(key).get().getData().preCombine(hoodieRecord.getData());
result.add(Pair.of(key, Option.of(new HoodieAvroRecord(hoodieRecord.getKey(), mergedPayload))));
} else {
// only base record
result.add(Pair.of(key, Option.of(hoodieRecord)));
}
} else {
// only log record
result.add(Pair.of(key, logRecords.get(key)));
}
}
timings.add(timer.endTimer());
} else {
// no base file at all
timings.add(timer.endTimer());
for (Map.Entry<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : logRecords.entrySet()) {
result.add(Pair.of(entry.getKey(), entry.getValue()));
}
}
return result;
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class HoodieBackedTableMetadata method openReadersIfNeeded.
/**
* Create a file reader and the record scanner for a given partition and file slice
* if readers are not already available.
*
* @param partitionName - Partition name
* @param slice - The file slice to open readers for
* @return File reader and the record scanner pair for the requested file slice
*/
private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReadersIfNeeded(String partitionName, FileSlice slice) {
return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> {
try {
HoodieTimer timer = new HoodieTimer().startTimer();
// Open base file reader
Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
// Open the log record scanner using the log files from the latest file slice
List<HoodieLogFile> logFiles = slice.getLogFiles().collect(Collectors.toList());
Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair = getLogRecordScanner(logFiles, partitionName);
HoodieMetadataMergedLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, +baseFileOpenMs + logScannerOpenMs));
return Pair.of(baseFileReader, logRecordScanner);
} catch (IOException e) {
throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
}
});
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class MetadataCommand method listFiles.
@CliCommand(value = "metadata list-files", help = "Print a list of all files in a partition from the metadata")
public String listFiles(@CliOption(key = { "partition" }, help = "Name of the partition to list files", mandatory = true) final String partition) throws IOException {
HoodieCLI.getTableMetaClient();
HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), config, HoodieCLI.basePath, "/tmp");
if (!metaReader.enabled()) {
return "[ERROR] Metadata Table not enabled/initialized\n\n";
}
HoodieTimer timer = new HoodieTimer().startTimer();
FileStatus[] statuses = metaReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
LOG.debug("Took " + timer.endTimer() + " ms");
final List<Comparable[]> rows = new ArrayList<>();
Arrays.stream(statuses).sorted((p1, p2) -> p2.getPath().getName().compareTo(p1.getPath().getName())).forEach(f -> {
Comparable[] row = new Comparable[1];
row[0] = f;
rows.add(row);
});
TableHeader header = new TableHeader().addTableHeaderField("file path");
return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class MetadataCommand method init.
@CliCommand(value = "metadata init", help = "Update the metadata table from commits since the creation")
public String init(@CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master, @CliOption(key = { "readonly" }, unspecifiedDefaultValue = "false", help = "Open in read-only mode") final boolean readOnly) throws Exception {
HoodieCLI.getTableMetaClient();
Path metadataPath = new Path(getMetadataTableBasePath(HoodieCLI.basePath));
try {
HoodieCLI.fs.listStatus(metadataPath);
} catch (FileNotFoundException e) {
// Metadata directory does not exist
throw new RuntimeException("Metadata directory (" + metadataPath.toString() + ") does not exist.");
}
HoodieTimer timer = new HoodieTimer().startTimer();
if (!readOnly) {
HoodieWriteConfig writeConfig = getWriteConfig();
initJavaSparkContext(Option.of(master));
SparkHoodieBackedTableMetadataWriter.create(HoodieCLI.conf, writeConfig, new HoodieSparkEngineContext(jsc));
}
String action = readOnly ? "Opened" : "Initialized";
return String.format(action + " Metadata Table in %s (duration=%.2fsec)", metadataPath, (timer.endTimer()) / 1000.0);
}
use of org.apache.hudi.common.util.HoodieTimer in project hudi by apache.
the class MetadataCommand method validateFiles.
@CliCommand(value = "metadata validate-files", help = "Validate all files in all partitions from the metadata")
public String validateFiles(@CliOption(key = { "verbose" }, help = "Print all file details", unspecifiedDefaultValue = "false") final boolean verbose) throws IOException {
HoodieCLI.getTableMetaClient();
HoodieMetadataConfig config = HoodieMetadataConfig.newBuilder().enable(true).build();
HoodieBackedTableMetadata metadataReader = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), config, HoodieCLI.basePath, "/tmp");
if (!metadataReader.enabled()) {
return "[ERROR] Metadata Table not enabled/initialized\n\n";
}
HoodieMetadataConfig fsConfig = HoodieMetadataConfig.newBuilder().enable(false).build();
HoodieBackedTableMetadata fsMetaReader = new HoodieBackedTableMetadata(new HoodieLocalEngineContext(HoodieCLI.conf), fsConfig, HoodieCLI.basePath, "/tmp");
HoodieTimer timer = new HoodieTimer().startTimer();
List<String> metadataPartitions = metadataReader.getAllPartitionPaths();
LOG.debug("Listing partitions Took " + timer.endTimer() + " ms");
List<String> fsPartitions = fsMetaReader.getAllPartitionPaths();
Collections.sort(fsPartitions);
Collections.sort(metadataPartitions);
Set<String> allPartitions = new HashSet<>();
allPartitions.addAll(fsPartitions);
allPartitions.addAll(metadataPartitions);
if (!fsPartitions.equals(metadataPartitions)) {
LOG.error("FS partition listing is not matching with metadata partition listing!");
LOG.error("All FS partitions: " + Arrays.toString(fsPartitions.toArray()));
LOG.error("All Metadata partitions: " + Arrays.toString(metadataPartitions.toArray()));
}
final List<Comparable[]> rows = new ArrayList<>();
for (String partition : allPartitions) {
Map<String, FileStatus> fileStatusMap = new HashMap<>();
Map<String, FileStatus> metadataFileStatusMap = new HashMap<>();
FileStatus[] metadataStatuses = metadataReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
Arrays.stream(metadataStatuses).forEach(entry -> metadataFileStatusMap.put(entry.getPath().getName(), entry));
FileStatus[] fsStatuses = fsMetaReader.getAllFilesInPartition(new Path(HoodieCLI.basePath, partition));
Arrays.stream(fsStatuses).forEach(entry -> fileStatusMap.put(entry.getPath().getName(), entry));
Set<String> allFiles = new HashSet<>();
allFiles.addAll(fileStatusMap.keySet());
allFiles.addAll(metadataFileStatusMap.keySet());
for (String file : allFiles) {
Comparable[] row = new Comparable[6];
row[0] = partition;
FileStatus fsFileStatus = fileStatusMap.get(file);
FileStatus metaFileStatus = metadataFileStatusMap.get(file);
boolean doesFsFileExists = fsFileStatus != null;
boolean doesMetadataFileExists = metaFileStatus != null;
long fsFileLength = doesFsFileExists ? fsFileStatus.getLen() : 0;
long metadataFileLength = doesMetadataFileExists ? metaFileStatus.getLen() : 0;
row[1] = file;
row[2] = doesFsFileExists;
row[3] = doesMetadataFileExists;
row[4] = fsFileLength;
row[5] = metadataFileLength;
if (verbose) {
// if verbose print all files
rows.add(row);
} else if ((doesFsFileExists != doesMetadataFileExists) || (fsFileLength != metadataFileLength)) {
// if non verbose, print only non matching files
rows.add(row);
}
}
if (metadataStatuses.length != fsStatuses.length) {
LOG.error(" FS and metadata files count not matching for " + partition + ". FS files count " + fsStatuses.length + ", metadata base files count " + metadataStatuses.length);
}
for (Map.Entry<String, FileStatus> entry : fileStatusMap.entrySet()) {
if (!metadataFileStatusMap.containsKey(entry.getKey())) {
LOG.error("FS file not found in metadata " + entry.getKey());
} else {
if (entry.getValue().getLen() != metadataFileStatusMap.get(entry.getKey()).getLen()) {
LOG.error(" FS file size mismatch " + entry.getKey() + ", size equality " + (entry.getValue().getLen() == metadataFileStatusMap.get(entry.getKey()).getLen()) + ". FS size " + entry.getValue().getLen() + ", metadata size " + metadataFileStatusMap.get(entry.getKey()).getLen());
}
}
}
for (Map.Entry<String, FileStatus> entry : metadataFileStatusMap.entrySet()) {
if (!fileStatusMap.containsKey(entry.getKey())) {
LOG.error("Metadata file not found in FS " + entry.getKey());
} else {
if (entry.getValue().getLen() != fileStatusMap.get(entry.getKey()).getLen()) {
LOG.error(" Metadata file size mismatch " + entry.getKey() + ", size equality " + (entry.getValue().getLen() == fileStatusMap.get(entry.getKey()).getLen()) + ". Metadata size " + entry.getValue().getLen() + ", FS size " + metadataFileStatusMap.get(entry.getKey()).getLen());
}
}
}
}
TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("File Name").addTableHeaderField(" Is Present in FS ").addTableHeaderField(" Is Present in Metadata").addTableHeaderField(" FS size").addTableHeaderField(" Metadata size");
return HoodiePrintHelper.print(header, new HashMap<>(), "", false, Integer.MAX_VALUE, false, rows);
}
Aggregations