use of org.apache.hadoop.fs.LocatedFileStatus in project apex-core by apache.
the class FSAgent method listFiles.
public List<String> listFiles(String dir) throws IOException {
List<String> files = new ArrayList<>();
Path path = new Path(dir);
FileStatus fileStatus = fileSystem.getFileStatus(path);
if (!fileStatus.isDirectory()) {
throw new FileNotFoundException("Cannot read directory " + dir);
}
RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
while (it.hasNext()) {
LocatedFileStatus lfs = it.next();
files.add(lfs.getPath().getName());
}
return files;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.
the class TestPrestoS3FileSystem method testListPrefixModes.
@Test
public void testListPrefixModes() throws Exception {
S3ObjectSummary rootObject = new S3ObjectSummary();
rootObject.setStorageClass(StorageClass.Standard.toString());
rootObject.setKey("standard-object-at-root.txt");
rootObject.setLastModified(new Date());
S3ObjectSummary childObject = new S3ObjectSummary();
childObject.setStorageClass(StorageClass.Standard.toString());
childObject.setKey("prefix/child-object.txt");
childObject.setLastModified(new Date());
try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3() {
@Override
public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) {
ObjectListing listing = new ObjectListing();
// Shallow listing
if ("/".equals(listObjectsRequest.getDelimiter())) {
listing.getCommonPrefixes().add("prefix");
listing.getObjectSummaries().add(rootObject);
return listing;
}
// Recursive listing of object keys only
listing.getObjectSummaries().addAll(Arrays.asList(childObject, rootObject));
return listing;
}
};
Path rootPath = new Path("s3n://test-bucket/");
fs.initialize(rootPath.toUri(), new Configuration());
fs.setS3Client(s3);
List<LocatedFileStatus> shallowAll = remoteIteratorToList(fs.listLocatedStatus(rootPath));
assertEquals(shallowAll.size(), 2);
assertTrue(shallowAll.get(0).isDirectory());
assertFalse(shallowAll.get(1).isDirectory());
assertEquals(shallowAll.get(0).getPath(), new Path(rootPath, "prefix"));
assertEquals(shallowAll.get(1).getPath(), new Path(rootPath, rootObject.getKey()));
List<LocatedFileStatus> shallowFiles = remoteIteratorToList(fs.listFiles(rootPath, false));
assertEquals(shallowFiles.size(), 1);
assertFalse(shallowFiles.get(0).isDirectory());
assertEquals(shallowFiles.get(0).getPath(), new Path(rootPath, rootObject.getKey()));
List<LocatedFileStatus> recursiveFiles = remoteIteratorToList(fs.listFiles(rootPath, true));
assertEquals(recursiveFiles.size(), 2);
assertFalse(recursiveFiles.get(0).isDirectory());
assertFalse(recursiveFiles.get(1).isDirectory());
assertEquals(recursiveFiles.get(0).getPath(), new Path(rootPath, childObject.getKey()));
assertEquals(recursiveFiles.get(1).getPath(), new Path(rootPath, rootObject.getKey()));
}
}
use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.
the class PrestoS3FileSystem method statusFromPrefixes.
private Iterator<LocatedFileStatus> statusFromPrefixes(List<String> prefixes) {
List<LocatedFileStatus> list = new ArrayList<>(prefixes.size());
for (String prefix : prefixes) {
Path path = qualifiedPath(new Path(PATH_SEPARATOR + prefix));
FileStatus status = new FileStatus(0, true, 1, 0, 0, path);
list.add(createLocatedFileStatus(status));
}
return list.iterator();
}
use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.
the class PrestoS3FileSystem method listPrefix.
private Iterator<LocatedFileStatus> listPrefix(Path path, OptionalInt initialMaxKeys, ListingMode mode) {
String key = keyFromPath(path);
if (!key.isEmpty()) {
key += PATH_SEPARATOR;
}
ListObjectsRequest request = new ListObjectsRequest().withBucketName(getBucketName(uri)).withPrefix(key).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null);
STATS.newListObjectsCall();
Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {
@Override
protected ObjectListing computeNext(ObjectListing previous) {
if (!previous.isTruncated()) {
return null;
}
// Clear any max keys set for the initial request before submitting subsequent requests. Values < 0
// are not sent in the request and the default limit is used
previous.setMaxKeys(-1);
return s3.listNextBatchOfObjects(previous);
}
};
Iterator<LocatedFileStatus> result = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
if (mode.isFilesOnly()) {
// Even recursive listing can still contain empty "directory" objects, must filter them out
result = Iterators.filter(result, LocatedFileStatus::isFile);
}
return result;
}
use of org.apache.hadoop.fs.LocatedFileStatus in project storm by apache.
the class HdfsUtils method listFilesByModificationTime.
/**
* list files sorted by modification time that have not been modified since 'olderThan'. if
* 'olderThan' is <= 0 then the filtering is disabled
*/
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException {
ArrayList<LocatedFileStatus> fstats = new ArrayList<>();
RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
while (itr.hasNext()) {
LocatedFileStatus fileStatus = itr.next();
if (olderThan > 0) {
if (fileStatus.getModificationTime() <= olderThan) {
fstats.add(fileStatus);
}
} else {
fstats.add(fileStatus);
}
}
Collections.sort(fstats, new ModifTimeComparator());
ArrayList<Path> result = new ArrayList<>(fstats.size());
for (LocatedFileStatus fstat : fstats) {
result.add(fstat.getPath());
}
return result;
}
Aggregations