Search in sources :

Example 36 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project apex-core by apache.

the class FSAgent method listFiles.

public List<String> listFiles(String dir) throws IOException {
    List<String> files = new ArrayList<>();
    Path path = new Path(dir);
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (!fileStatus.isDirectory()) {
        throw new FileNotFoundException("Cannot read directory " + dir);
    }
    RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
    while (it.hasNext()) {
        LocatedFileStatus lfs = it.next();
        files.add(lfs.getPath().getName());
    }
    return files;
}
Also used : Path(org.apache.hadoop.fs.Path) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 37 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.

the class TestPrestoS3FileSystem method testListPrefixModes.

@Test
public void testListPrefixModes() throws Exception {
    S3ObjectSummary rootObject = new S3ObjectSummary();
    rootObject.setStorageClass(StorageClass.Standard.toString());
    rootObject.setKey("standard-object-at-root.txt");
    rootObject.setLastModified(new Date());
    S3ObjectSummary childObject = new S3ObjectSummary();
    childObject.setStorageClass(StorageClass.Standard.toString());
    childObject.setKey("prefix/child-object.txt");
    childObject.setLastModified(new Date());
    try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3() {

            @Override
            public ObjectListing listObjects(ListObjectsRequest listObjectsRequest) {
                ObjectListing listing = new ObjectListing();
                // Shallow listing
                if ("/".equals(listObjectsRequest.getDelimiter())) {
                    listing.getCommonPrefixes().add("prefix");
                    listing.getObjectSummaries().add(rootObject);
                    return listing;
                }
                // Recursive listing of object keys only
                listing.getObjectSummaries().addAll(Arrays.asList(childObject, rootObject));
                return listing;
            }
        };
        Path rootPath = new Path("s3n://test-bucket/");
        fs.initialize(rootPath.toUri(), new Configuration());
        fs.setS3Client(s3);
        List<LocatedFileStatus> shallowAll = remoteIteratorToList(fs.listLocatedStatus(rootPath));
        assertEquals(shallowAll.size(), 2);
        assertTrue(shallowAll.get(0).isDirectory());
        assertFalse(shallowAll.get(1).isDirectory());
        assertEquals(shallowAll.get(0).getPath(), new Path(rootPath, "prefix"));
        assertEquals(shallowAll.get(1).getPath(), new Path(rootPath, rootObject.getKey()));
        List<LocatedFileStatus> shallowFiles = remoteIteratorToList(fs.listFiles(rootPath, false));
        assertEquals(shallowFiles.size(), 1);
        assertFalse(shallowFiles.get(0).isDirectory());
        assertEquals(shallowFiles.get(0).getPath(), new Path(rootPath, rootObject.getKey()));
        List<LocatedFileStatus> recursiveFiles = remoteIteratorToList(fs.listFiles(rootPath, true));
        assertEquals(recursiveFiles.size(), 2);
        assertFalse(recursiveFiles.get(0).isDirectory());
        assertFalse(recursiveFiles.get(1).isDirectory());
        assertEquals(recursiveFiles.get(0).getPath(), new Path(rootPath, childObject.getKey()));
        assertEquals(recursiveFiles.get(1).getPath(), new Path(rootPath, rootObject.getKey()));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) Configuration(org.apache.hadoop.conf.Configuration) ClientConfiguration(com.amazonaws.ClientConfiguration) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Date(java.util.Date) Test(org.testng.annotations.Test)

Example 38 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.

the class PrestoS3FileSystem method statusFromPrefixes.

private Iterator<LocatedFileStatus> statusFromPrefixes(List<String> prefixes) {
    List<LocatedFileStatus> list = new ArrayList<>(prefixes.size());
    for (String prefix : prefixes) {
        Path path = qualifiedPath(new Path(PATH_SEPARATOR + prefix));
        FileStatus status = new FileStatus(0, true, 1, 0, 0, path);
        list.add(createLocatedFileStatus(status));
    }
    return list.iterator();
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 39 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project presto by prestodb.

the class PrestoS3FileSystem method listPrefix.

private Iterator<LocatedFileStatus> listPrefix(Path path, OptionalInt initialMaxKeys, ListingMode mode) {
    String key = keyFromPath(path);
    if (!key.isEmpty()) {
        key += PATH_SEPARATOR;
    }
    ListObjectsRequest request = new ListObjectsRequest().withBucketName(getBucketName(uri)).withPrefix(key).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null);
    STATS.newListObjectsCall();
    Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {

        @Override
        protected ObjectListing computeNext(ObjectListing previous) {
            if (!previous.isTruncated()) {
                return null;
            }
            // Clear any max keys set for the initial request before submitting subsequent requests. Values < 0
            // are not sent in the request and the default limit is used
            previous.setMaxKeys(-1);
            return s3.listNextBatchOfObjects(previous);
        }
    };
    Iterator<LocatedFileStatus> result = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
    if (mode.isFilesOnly()) {
        // Even recursive listing can still contain empty "directory" objects, must filter them out
        result = Iterators.filter(result, LocatedFileStatus::isFile);
    }
    return result;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator)

Example 40 with LocatedFileStatus

use of org.apache.hadoop.fs.LocatedFileStatus in project storm by apache.

the class HdfsUtils method listFilesByModificationTime.

/**
 * list files sorted by modification time that have not been modified since 'olderThan'. if
 * 'olderThan' is <= 0 then the filtering is disabled
 */
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException {
    ArrayList<LocatedFileStatus> fstats = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
    while (itr.hasNext()) {
        LocatedFileStatus fileStatus = itr.next();
        if (olderThan > 0) {
            if (fileStatus.getModificationTime() <= olderThan) {
                fstats.add(fileStatus);
            }
        } else {
            fstats.add(fileStatus);
        }
    }
    Collections.sort(fstats, new ModifTimeComparator());
    ArrayList<Path> result = new ArrayList<>(fstats.size());
    for (LocatedFileStatus fstat : fstats) {
        result.add(fstat.getPath());
    }
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Aggregations

LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)139 Path (org.apache.hadoop.fs.Path)104 FileSystem (org.apache.hadoop.fs.FileSystem)55 ArrayList (java.util.ArrayList)43 Test (org.junit.Test)33 FileStatus (org.apache.hadoop.fs.FileStatus)29 IOException (java.io.IOException)27 Configuration (org.apache.hadoop.conf.Configuration)20 File (java.io.File)13 FileNotFoundException (java.io.FileNotFoundException)11 HashSet (java.util.HashSet)11 BlockLocation (org.apache.hadoop.fs.BlockLocation)9 RemoteIterator (org.apache.hadoop.fs.RemoteIterator)7 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)7 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)6 HashMap (java.util.HashMap)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 Map (java.util.Map)5 Matcher (java.util.regex.Matcher)5 BufferedReader (java.io.BufferedReader)4