use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class RepairsCommand method addPartitionMeta.
@CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a table, if not present")
public String addPartitionMeta(@CliOption(key = { "dryrun" }, help = "Should we actually add or just print what would be done", unspecifiedDefaultValue = "true") final boolean dryRun) throws IOException {
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
String latestCommit = client.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp();
List<String> partitionPaths = FSUtils.getAllPartitionFoldersThreeLevelsDown(HoodieCLI.fs, client.getBasePath());
Path basePath = new Path(client.getBasePath());
String[][] rows = new String[partitionPaths.size()][];
int ind = 0;
for (String partition : partitionPaths) {
Path partitionPath = FSUtils.getPartitionPath(basePath, partition);
String[] row = new String[3];
row[0] = partition;
row[1] = "Yes";
row[2] = "None";
if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
row[1] = "No";
if (!dryRun) {
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath, partitionPath);
partitionMetadata.trySave(0);
row[2] = "Repaired";
}
}
rows[ind++] = row;
}
return HoodiePrintHelper.print(new String[] { HoodieTableHeaderFields.HEADER_PARTITION_PATH, HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION }, rows);
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class TablePathUtils method getTablePathFromPartitionPath.
private static Option<Path> getTablePathFromPartitionPath(FileSystem fs, Path partitionPath) {
try {
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
metadata.readFromFS();
return Option.of(getNthParent(partitionPath, metadata.getPartitionDepth()));
} else {
// Simply traverse directory structure until found .hoodie folder
Path current = partitionPath;
while (current != null) {
if (hasTableMetadataFolder(fs, current)) {
return Option.of(current);
}
current = current.getParent();
}
return Option.empty();
}
} catch (IOException ioe) {
throw new HoodieException("Error reading partition metadata for " + partitionPath, ioe);
}
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class InputFormatTestUtil method setupPartition.
private static void setupPartition(java.nio.file.Path basePath, java.nio.file.Path partitionPath) throws IOException {
Files.createDirectories(partitionPath);
// Create partition metadata to properly setup table's partition
RawLocalFileSystem lfs = new RawLocalFileSystem();
lfs.setConf(HoodieTestUtils.getDefaultHadoopConf());
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(new LocalFileSystem(lfs), "0", new Path(basePath.toAbsolutePath().toString()), new Path(partitionPath.toAbsolutePath().toString()));
partitionMetadata.trySave((int) (Math.random() * 1000));
}
use of org.apache.hudi.common.model.HoodiePartitionMetadata in project hudi by apache.
the class HoodieROTablePathFilter method accept.
@Override
public boolean accept(Path path) {
if (engineContext == null) {
this.engineContext = new HoodieLocalEngineContext(this.conf.get());
}
if (LOG.isDebugEnabled()) {
LOG.debug("Checking acceptance for path " + path);
}
Path folder = null;
try {
if (fs == null) {
fs = path.getFileSystem(conf.get());
}
// Assumes path is a file
// get the immediate parent.
folder = path.getParent();
// Try to use the caches.
if (nonHoodiePathCache.contains(folder.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Accepting non-hoodie path from cache: " + path);
}
return true;
}
if (hoodiePathCache.containsKey(folder.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("%s Hoodie path checked against cache, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path)));
}
return hoodiePathCache.get(folder.toString()).contains(path);
}
// Skip all files that are descendants of .hoodie in its path.
String filePath = path.toString();
if (filePath.contains("/" + HoodieTableMetaClient.METAFOLDER_NAME + "/") || filePath.endsWith("/" + HoodieTableMetaClient.METAFOLDER_NAME)) {
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Skipping Hoodie Metadata file %s \n", filePath));
}
return false;
}
// Perform actual checking.
Path baseDir;
if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) {
HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder);
metadata.readFromFS();
baseDir = HoodieHiveUtils.getNthParent(folder, metadata.getPartitionDepth());
} else {
baseDir = safeGetParentsParent(folder);
}
if (baseDir != null) {
// Check whether baseDir in nonHoodiePathCache
if (nonHoodiePathCache.contains(baseDir.toString())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Accepting non-hoodie path from cache: " + path);
}
return true;
}
HoodieTableFileSystemView fsView = null;
try {
HoodieTableMetaClient metaClient = metaClientCache.get(baseDir.toString());
if (null == metaClient) {
metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).setLoadActiveTimelineOnLoad(true).build();
metaClientCache.put(baseDir.toString(), metaClient);
}
fsView = FileSystemViewManager.createInMemoryFileSystemView(engineContext, metaClient, HoodieInputFormatUtils.buildMetadataConfig(getConf()));
String partition = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), folder);
List<HoodieBaseFile> latestFiles = fsView.getLatestBaseFiles(partition).collect(Collectors.toList());
// populate the cache
if (!hoodiePathCache.containsKey(folder.toString())) {
hoodiePathCache.put(folder.toString(), new HashSet<>());
}
LOG.info("Based on hoodie metadata from base path: " + baseDir.toString() + ", caching " + latestFiles.size() + " files under " + folder);
for (HoodieBaseFile lfile : latestFiles) {
hoodiePathCache.get(folder.toString()).add(new Path(lfile.getPath()));
}
// accept the path, if its among the latest files.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("%s checked after cache population, accept => %s \n", path, hoodiePathCache.get(folder.toString()).contains(path)));
}
return hoodiePathCache.get(folder.toString()).contains(path);
} catch (TableNotFoundException e) {
// Non-hoodie path, accept it.
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("(1) Caching non-hoodie path under %s with basePath %s \n", folder.toString(), baseDir.toString()));
}
nonHoodiePathCache.add(folder.toString());
nonHoodiePathCache.add(baseDir.toString());
return true;
} finally {
if (fsView != null) {
fsView.close();
}
}
} else {
// files is at < 3 level depth in FS tree, can't be hoodie dataset
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("(2) Caching non-hoodie path under %s \n", folder.toString()));
}
nonHoodiePathCache.add(folder.toString());
return true;
}
} catch (Exception e) {
String msg = "Error checking path :" + path + ", under folder: " + folder;
LOG.error(msg, e);
throw new HoodieException(msg, e);
}
}
Aggregations