use of org.apache.drill.exec.store.parquet.metadata.Metadata in project drill by apache.
the class ParquetTableMetadataProviderImpl method initInternal.
@Override
protected void initInternal() throws IOException {
try (FileSystem processUserFileSystem = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf())) {
// Depending on the version of metadata this may represent more than 1 metadata file paths.
List<Path> metaPaths = new ArrayList<>();
if (entries.size() == 1 && parquetTableMetadata == null) {
Path p = Path.getPathWithoutSchemeAndAuthority(entries.get(0).getPath());
if (fs.isDirectory(p)) {
// Using the metadata file makes sense when querying a directory; otherwise
// if querying a single file we can look up the metadata directly from the file
metaPaths = populateMetaPaths(p, fs);
}
if (!metaContext.isMetadataCacheCorrupted() && !metaPaths.isEmpty()) {
parquetTableMetadata = Metadata.readBlockMeta(processUserFileSystem, metaPaths, metaContext, readerConfig);
if (parquetTableMetadata != null) {
usedMetadataCache = true;
}
}
if (!usedMetadataCache) {
parquetTableMetadata = Metadata.getParquetTableMetadata(processUserFileSystem, p, readerConfig);
}
} else {
Path p = Path.getPathWithoutSchemeAndAuthority(selectionRoot);
metaPaths = populateMetaPaths(p, fs);
if (!metaContext.isMetadataCacheCorrupted() && fs.isDirectory(selectionRoot) && !metaPaths.isEmpty()) {
if (parquetTableMetadata == null) {
parquetTableMetadata = Metadata.readBlockMeta(processUserFileSystem, metaPaths, metaContext, readerConfig);
}
if (parquetTableMetadata != null) {
usedMetadataCache = true;
if (fileSet != null) {
parquetTableMetadata = removeUnneededRowGroups(parquetTableMetadata);
}
}
}
if (!usedMetadataCache) {
final List<FileStatus> fileStatuses = new ArrayList<>();
for (ReadEntryWithPath entry : entries) {
fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, Path.getPathWithoutSchemeAndAuthority(entry.getPath()), true));
}
Map<FileStatus, FileSystem> statusMap = fileStatuses.stream().collect(Collectors.toMap(Function.identity(), s -> processUserFileSystem, (oldFs, newFs) -> newFs, LinkedHashMap::new));
parquetTableMetadata = Metadata.getParquetTableMetadata(statusMap, readerConfig);
}
}
}
}
use of org.apache.drill.exec.store.parquet.metadata.Metadata in project drill by apache.
the class ParquetTableMetadataProviderImpl method populateMetaPaths.
/**
* Returns list of metadata cache files
* @param p directory path of the cache file
* @param fs filesystem object
* @return list of cache files found in the given directory path
*/
public List<Path> populateMetaPaths(Path p, DrillFileSystem fs) throws IOException {
if (fs.isDirectory(p)) {
List<Path> metaFilepaths = Arrays.stream(Metadata.CURRENT_METADATA_FILENAMES).map(filename -> new Path(p, filename)).collect(Collectors.toList());
for (String filename : Metadata.OLD_METADATA_FILENAMES) {
// Read the older version of metadata file if the current version of metadata cache files does not exist.
if (fileExists(fs, metaFilepaths)) {
return metaFilepaths;
}
metaFilepaths.clear();
metaFilepaths.add(new Path(p, filename));
}
if (fileExists(fs, metaFilepaths)) {
return metaFilepaths;
}
}
return Collections.emptyList();
}
Aggregations