use of org.apache.hudi.common.model.HoodieBaseFile in project hudi by apache.
the class HoodieROTablePathFilter method accept.
@Override
public boolean accept(Path path) {
if (engineContext == null) {
this.engineContext = new HoodieLocalEngineContext(this.conf.get());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Checking acceptance for path " + path);
}
Path folder = null;
try {
if (fs == null) {
fs = path.getFileSystem(conf.get());
}
// Assumes path is a file
// get the immediate parent.
folder = path.getParent();
// Try to use the caches.
if (nonHoodiePathCache.contains(folder.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Accepting non-hoodie path from cache: " + path);
}
return true;
}
if (hoodiePathCache.containsKey(folder.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("%s Hoodie path checked against cache, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path)));
}
return hoodiePathCache.get(folder.toString()).contains(path);
}
// Skip all files that are descendants of .hoodie in its path.
String filePath = path.toString();
if (filePath.contains("/" + HoodieTableMetaClient.METAFOLDER_NAME + "/") || filePath.endsWith("/" + HoodieTableMetaClient.METAFOLDER_NAME)) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Skipping Hoodie Metadata file %s \n", filePath));
}
return false;
}
// Perform actual checking.
Path baseDir;
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) {
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder);
metadata.readFromFS();
baseDir = HoodieHiveUtils.getNthParent(folder, metadata.getPartitionDepth());
} else {
baseDir = safeGetParentsParent(folder);
}
if (baseDir != null) {
// Check whether baseDir in nonHoodiePathCache
if (nonHoodiePathCache.contains(baseDir.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Accepting non-hoodie path from cache: " + path);
}
return true;
}
HoodieTableFileSystemView fsView = null;
try {
HoodieTableMetaClient metaClient = metaClientCache.get(baseDir.toString());
if (null == metaClient) {
metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).setLoadActiveTimelineOnLoad(true).build();
metaClientCache.put(baseDir.toString(), metaClient);
}
fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf()));
String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder);
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
// populate the cache
if (!hoodiePathCache.containsKey(folder.toString())) {
hoodiePathCache.put(folder.toString(), new HashSet<>());
}
LOG.info("Based on hoodie metadata from base path: " + baseDir.toString() + ", caching " + latestFiles.size() + " files under " + folder);
for (HoodieBaseFile lfile : latestFiles) {
hoodiePathCache.get(folder.toString()).add(new Path(lfile.getPath()));
}
// accept the path, if its among the latest files.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("%s checked after cache population, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path)));
}
return hoodiePathCache.get(folder.toString()).contains(path);
} catch (TableNotFoundException e) {
// Non-hoodie path, accept it.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("(1) Caching non-hoodie path under %s with basePath %s \n", folder.toString(), baseDir.toString()));
}
nonHoodiePathCache.add(folder.toString());
nonHoodiePathCache.add(baseDir.toString());
return true;
} finally {
if (fsView != null) {
fsView.close();
}
}
} else {
// files is at < 3 level depth in FS tree, can't be hoodie dataset
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("(2) Caching non-hoodie path under %s \n", folder.toString()));
}
nonHoodiePathCache.add(folder.toString());
return true;
}
} catch (Exception e) {
String msg = "Error checking path :" + path + ", under folder: " + folder;
LOG.error(msg, e);
throw new HoodieException(msg, e);
}
}
Aggregations