use of org.apache.hudi.exception.MetadataNotFoundException in project hudi by apache.
the class OrcUtils method readFooter.
@Override
public Map<String, String> readFooter(Configuration conf, boolean required, Path orcFilePath, String... footerNames) {
try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
Map<String, String> footerVals = new HashMap<>();
List<UserMetadataItem> metadataItemList = reader.getFileTail().getFooter().getMetadataList();
Map<String, String> metadata = metadataItemList.stream().collect(Collectors.toMap(UserMetadataItem::getName, metadataItem -> metadataItem.getValue().toStringUtf8()));
for (String footerName : footerNames) {
if (metadata.containsKey(footerName)) {
footerVals.put(footerName, metadata.get(footerName));
} else if (required) {
throw new MetadataNotFoundException("Could not find index in ORC footer. Looked for key " + footerName + " in " + orcFilePath);
}
}
return footerVals;
} catch (IOException io) {
throw new HoodieIOException("Unable to read footer for ORC file:" + orcFilePath, io);
}
}
use of org.apache.hudi.exception.MetadataNotFoundException in project hudi by apache.
the class HoodieBloomIndex method loadColumnRangesFromMetaIndex.
/**
* Load the column stats index as BloomIndexFileInfo for all the involved files in the partition.
*
* @param partitions - List of partitions for which column stats need to be loaded
* @param context - Engine context
* @param hoodieTable - Hoodie table
* @return List of partition and file column range info pairs
*/
protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
// also obtain file ranges, if range pruning is enabled
context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
return context.flatMap(partitions, partitionName -> {
// Partition and file name pairs
List<Pair<String, String>> partitionFileNameList = HoodieIndexUtils.getLatestBaseFilesForPartition(partitionName, hoodieTable).stream().map(baseFile -> Pair.of(partitionName, baseFile.getFileName())).sorted().collect(toList());
if (partitionFileNameList.isEmpty()) {
return Stream.empty();
}
try {
Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable.getMetadataTable().getColumnStats(partitionFileNameList, keyField);
List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
result.add(Pair.of(entry.getKey().getLeft(), new BloomIndexFileInfo(FSUtils.getFileId(entry.getKey().getRight()), entry.getValue().getMinValue(), entry.getValue().getMaxValue())));
}
return result.stream();
} catch (MetadataNotFoundException me) {
throw new HoodieMetadataException("Unable to find column range metadata for partition:" + partitionName, me);
}
}, Math.max(partitions.size(), 1));
}
use of org.apache.hudi.exception.MetadataNotFoundException in project hudi by apache.
the class HoodieBloomIndex method loadColumnRangesFromFiles.
/**
* Load all involved files as <Partition, filename> pair List.
*/
List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles(List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
// Obtain the latest data files from all the partitions.
List<Pair<String, String>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream().map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId())).collect(toList());
context.setJobStatus(this.getClass().getName(), "Obtain key ranges for file slices (range pruning=on)");
return context.map(partitionPathFileIDList, pf -> {
try {
HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, pf);
String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys();
return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), minMaxKeys[0], minMaxKeys[1]));
} catch (MetadataNotFoundException me) {
LOG.warn("Unable to find range metadata in file :" + pf);
return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()));
}
}, Math.max(partitionPathFileIDList.size(), 1));
}
use of org.apache.hudi.exception.MetadataNotFoundException in project hudi by apache.
the class ParquetUtils method readFooter.
@Override
public Map<String, String> readFooter(Configuration configuration, boolean required, Path parquetFilePath, String... footerNames) {
Map<String, String> footerVals = new HashMap<>();
ParquetMetadata footer = readMetadata(configuration, parquetFilePath);
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
for (String footerName : footerNames) {
if (metadata.containsKey(footerName)) {
footerVals.put(footerName, metadata.get(footerName));
} else if (required) {
throw new MetadataNotFoundException("Could not find index in Parquet footer. Looked for key " + footerName + " in " + parquetFilePath);
}
}
return footerVals;
}
Aggregations