use of org.apache.hadoop.fs.LocatedFileStatus in project flink by apache.
the class YarnApplicationFileUploader method registerMultipleLocalResources.
/**
* Recursively uploads (and registers) any (user and system) files in <tt>shipFiles</tt> except
* for files matching "<tt>flink-dist*.jar</tt>" which should be uploaded separately. If it is
* already a remote file, the uploading will be skipped.
*
* @param shipFiles local or remote files to register as Yarn local resources
* @param localResourcesDirectory the directory the localResources are uploaded to
* @param resourceType type of the resource, which can be one of FILE, PATTERN, or ARCHIVE
* @return list of class paths with the proper resource keys from the registration
*/
List<String> registerMultipleLocalResources(final Collection<Path> shipFiles, final String localResourcesDirectory, final LocalResourceType resourceType) throws IOException {
final List<Path> localPaths = new ArrayList<>();
final List<Path> relativePaths = new ArrayList<>();
for (Path shipFile : shipFiles) {
if (Utils.isRemotePath(shipFile.toString())) {
if (fileSystem.isDirectory(shipFile)) {
final URI parentURI = shipFile.getParent().toUri();
final RemoteIterator<LocatedFileStatus> iterable = fileSystem.listFiles(shipFile, true);
while (iterable.hasNext()) {
final Path current = iterable.next().getPath();
localPaths.add(current);
relativePaths.add(new Path(localResourcesDirectory, parentURI.relativize(current.toUri()).getPath()));
}
continue;
}
} else {
final File file = new File(shipFile.toUri().getPath());
if (file.isDirectory()) {
final java.nio.file.Path shipPath = file.toPath().toRealPath();
final java.nio.file.Path parentPath = shipPath.getParent();
Collection<java.nio.file.Path> paths = FileUtils.listFilesInDirectory(shipPath, path -> true);
for (java.nio.file.Path javaPath : paths) {
localPaths.add(new Path(javaPath.toUri()));
relativePaths.add(new Path(localResourcesDirectory, parentPath.relativize(javaPath).toString()));
}
continue;
}
}
localPaths.add(shipFile);
relativePaths.add(new Path(localResourcesDirectory, shipFile.getName()));
}
final Set<String> archives = new HashSet<>();
final Set<String> resources = new HashSet<>();
for (int i = 0; i < localPaths.size(); i++) {
final Path localPath = localPaths.get(i);
final Path relativePath = relativePaths.get(i);
if (!isFlinkDistJar(relativePath.getName())) {
final String key = relativePath.toString();
final YarnLocalResourceDescriptor resourceDescriptor = registerSingleLocalResource(key, localPath, relativePath.getParent().toString(), resourceType, true, true);
if (!resourceDescriptor.alreadyRegisteredAsLocalResource()) {
if (key.endsWith("jar")) {
archives.add(relativePath.toString());
} else {
resources.add(relativePath.getParent().toString());
}
}
}
}
// construct classpath, we always want resource directories to go first, we also sort
// both resources and archives in order to make classpath deterministic
final ArrayList<String> classPaths = new ArrayList<>();
resources.stream().sorted().forEach(classPaths::add);
archives.stream().sorted().forEach(classPaths::add);
return classPaths;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project flink by apache.
the class YarnApplicationFileUploader method getAllFilesInProvidedLibDirs.
private Map<String, FileStatus> getAllFilesInProvidedLibDirs(final List<Path> providedLibDirs) {
final Map<String, FileStatus> allFiles = new HashMap<>();
checkNotNull(providedLibDirs).forEach(FunctionUtils.uncheckedConsumer(path -> {
if (!fileSystem.exists(path) || !fileSystem.isDirectory(path)) {
LOG.warn("Provided lib dir {} does not exist or is not a directory. Ignoring.", path);
} else {
final RemoteIterator<LocatedFileStatus> iterable = fileSystem.listFiles(path, true);
while (iterable.hasNext()) {
final LocatedFileStatus locatedFileStatus = iterable.next();
final String name = path.getParent().toUri().relativize(locatedFileStatus.getPath().toUri()).toString();
final FileStatus prevMapping = allFiles.put(name, locatedFileStatus);
if (prevMapping != null) {
throw new IOException("Two files with the same filename exist in the shared libs: " + prevMapping.getPath() + " - " + locatedFileStatus.getPath() + ". Please deduplicate.");
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("The following files were found in the shared lib dir: {}", allFiles.values().stream().map(fileStatus -> fileStatus.getPath().toString()).collect(Collectors.joining(", ")));
}
}
}));
return Collections.unmodifiableMap(allFiles);
}
use of org.apache.hadoop.fs.LocatedFileStatus in project flink by apache.
the class YarnFileStageTest method verifyDirectoryRecursive.
/**
* Verifies the content and name of file in the directory {@code worDir} are same with {@code
* expectedFiles}.
*
* @param targetFileSystem the filesystem type of {@code workDir}
* @param workDir the directory verified
* @param expectedFiles the expected name and content of the files
* @throws IOException if error occurs when visiting the {@code workDir}
* @throws InterruptedException if the sleep is interrupted.
*/
private static void verifyDirectoryRecursive(FileSystem targetFileSystem, Path workDir, Map<String, String> expectedFiles) throws IOException, InterruptedException {
final HashMap<String, String> /* contents */
targetFiles = new HashMap<>();
final RemoteIterator<LocatedFileStatus> targetFilesIterator = targetFileSystem.listFiles(workDir, true);
final int workDirPrefixLength = // one more for the concluding "/"
workDir.toString().length() + 1;
while (targetFilesIterator.hasNext()) {
final LocatedFileStatus targetFile = targetFilesIterator.next();
int retries = 5;
do {
try (FSDataInputStream in = targetFileSystem.open(targetFile.getPath())) {
String absolutePathString = targetFile.getPath().toString();
String relativePath = absolutePathString.substring(workDirPrefixLength);
targetFiles.put(relativePath, in.readUTF());
assertEquals("extraneous data in file " + relativePath, -1, in.read());
break;
} catch (FileNotFoundException e) {
// For S3, read-after-write may be eventually consistent, i.e. when trying
// to access the object before writing it; see
// https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel
// -> try again a bit later
Thread.sleep(50);
}
} while ((retries--) > 0);
}
assertThat(targetFiles, equalTo(expectedFiles));
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hbase by apache.
the class BackupUtils method loadBackupInfo.
public static BackupInfo loadBackupInfo(Path backupRootPath, String backupId, FileSystem fs) throws IOException {
Path backupPath = new Path(backupRootPath, backupId);
RemoteIterator<LocatedFileStatus> it = fs.listFiles(backupPath, true);
while (it.hasNext()) {
LocatedFileStatus lfs = it.next();
if (lfs.getPath().getName().equals(BackupManifest.MANIFEST_FILE_NAME)) {
// Load BackupManifest
BackupManifest manifest = new BackupManifest(fs, lfs.getPath().getParent());
BackupInfo info = manifest.toBackupInfo();
return info;
}
}
return null;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project hbase by apache.
the class TestBackupBase method dumpBackupDir.
protected void dumpBackupDir() throws IOException {
// Dump Backup Dir
FileSystem fs = FileSystem.get(conf1);
RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(BACKUP_ROOT_DIR), true);
while (it.hasNext()) {
LOG.debug(Objects.toString(it.next().getPath()));
}
}
Aggregations