Search in sources :

Example 6 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project presto by prestodb.

the class PrestoS3FileSystem method listPrefix.

private Iterator<LocatedFileStatus> listPrefix(Path path) {
    String key = keyFromPath(path);
    if (!key.isEmpty()) {
        key += PATH_SEPARATOR;
    }
    ListObjectsRequest request = new ListObjectsRequest().withBucketName(uri.getHost()).withPrefix(key).withDelimiter(PATH_SEPARATOR);
    STATS.newListObjectsCall();
    Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {

        @Override
        protected ObjectListing computeNext(ObjectListing previous) {
            if (!previous.isTruncated()) {
                return null;
            }
            return s3.listNextBatchOfObjects(previous);
        }
    };
    return Iterators.concat(Iterators.transform(listings, this::statusFromListing));
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator)

Example 7 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project nifi by apache.

the class TestListS3 method testListIgnoreByMinAge.

@Test
public void testListIgnoreByMinAge() throws IOException {
    runner.setProperty(ListS3.REGION, "eu-west-1");
    runner.setProperty(ListS3.BUCKET, "test-bucket");
    runner.setProperty(ListS3.MIN_AGE, "30 sec");
    Date lastModifiedNow = new Date();
    Date lastModifiedMinus1Hour = DateUtils.addHours(lastModifiedNow, -1);
    Date lastModifiedMinus3Hour = DateUtils.addHours(lastModifiedNow, -3);
    ObjectListing objectListing = new ObjectListing();
    S3ObjectSummary objectSummary1 = new S3ObjectSummary();
    objectSummary1.setBucketName("test-bucket");
    objectSummary1.setKey("minus-3hour");
    objectSummary1.setLastModified(lastModifiedMinus3Hour);
    objectListing.getObjectSummaries().add(objectSummary1);
    S3ObjectSummary objectSummary2 = new S3ObjectSummary();
    objectSummary2.setBucketName("test-bucket");
    objectSummary2.setKey("minus-1hour");
    objectSummary2.setLastModified(lastModifiedMinus1Hour);
    objectListing.getObjectSummaries().add(objectSummary2);
    S3ObjectSummary objectSummary3 = new S3ObjectSummary();
    objectSummary3.setBucketName("test-bucket");
    objectSummary3.setKey("now");
    objectSummary3.setLastModified(lastModifiedNow);
    objectListing.getObjectSummaries().add(objectSummary3);
    Mockito.when(mockS3Client.listObjects(Mockito.any(ListObjectsRequest.class))).thenReturn(objectListing);
    Map<String, String> stateMap = new HashMap<>();
    String previousTimestamp = String.valueOf(lastModifiedMinus3Hour.getTime());
    stateMap.put(ListS3.CURRENT_TIMESTAMP, previousTimestamp);
    stateMap.put(ListS3.CURRENT_KEY_PREFIX + "0", "minus-3hour");
    runner.getStateManager().setState(stateMap, Scope.CLUSTER);
    runner.run();
    ArgumentCaptor<ListObjectsRequest> captureRequest = ArgumentCaptor.forClass(ListObjectsRequest.class);
    Mockito.verify(mockS3Client, Mockito.times(1)).listObjects(captureRequest.capture());
    ListObjectsRequest request = captureRequest.getValue();
    assertEquals("test-bucket", request.getBucketName());
    Mockito.verify(mockS3Client, Mockito.never()).listVersions(Mockito.any());
    runner.assertAllFlowFilesTransferred(ListS3.REL_SUCCESS, 1);
    List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(ListS3.REL_SUCCESS);
    MockFlowFile ff0 = flowFiles.get(0);
    ff0.assertAttributeEquals("filename", "minus-1hour");
    ff0.assertAttributeEquals("s3.bucket", "test-bucket");
    String lastModifiedTimestamp = String.valueOf(lastModifiedMinus1Hour.getTime());
    ff0.assertAttributeEquals("s3.lastModified", lastModifiedTimestamp);
    runner.getStateManager().assertStateEquals(ListS3.CURRENT_TIMESTAMP, lastModifiedTimestamp, Scope.CLUSTER);
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) HashMap(java.util.HashMap) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Date(java.util.Date) Test(org.junit.Test)

Example 8 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project nifi by apache.

the class TestListS3 method testListObjectsNothingNew.

@Test
public void testListObjectsNothingNew() throws IOException {
    runner.setProperty(ListS3.REGION, "eu-west-1");
    runner.setProperty(ListS3.BUCKET, "test-bucket");
    Calendar calendar = Calendar.getInstance();
    calendar.set(2017, 5, 2);
    Date objectLastModified = calendar.getTime();
    long stateCurrentTimestamp = objectLastModified.getTime();
    Map<String, String> state = new HashMap<>();
    state.put(ListS3.CURRENT_TIMESTAMP, String.valueOf(stateCurrentTimestamp));
    state.put(ListS3.CURRENT_KEY_PREFIX + "0", "test-key");
    MockStateManager mockStateManager = runner.getStateManager();
    mockStateManager.setState(state, Scope.CLUSTER);
    ObjectListing objectListing = new ObjectListing();
    S3ObjectSummary objectSummary1 = new S3ObjectSummary();
    objectSummary1.setBucketName("test-bucket");
    objectSummary1.setKey("test-key");
    objectSummary1.setLastModified(objectLastModified);
    objectListing.getObjectSummaries().add(objectSummary1);
    Mockito.when(mockS3Client.listObjects(Mockito.any(ListObjectsRequest.class))).thenReturn(objectListing);
    runner.run();
    ArgumentCaptor<ListObjectsRequest> captureRequest = ArgumentCaptor.forClass(ListObjectsRequest.class);
    Mockito.verify(mockS3Client, Mockito.times(1)).listObjects(captureRequest.capture());
    ListObjectsRequest request = captureRequest.getValue();
    assertEquals("test-bucket", request.getBucketName());
    Mockito.verify(mockS3Client, Mockito.never()).listVersions(Mockito.any());
    runner.assertAllFlowFilesTransferred(ListS3.REL_SUCCESS, 0);
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) HashMap(java.util.HashMap) MockStateManager(org.apache.nifi.state.MockStateManager) Calendar(java.util.Calendar) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Date(java.util.Date) Test(org.junit.Test)

Example 9 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project stocator by SparkTC.

the class COSAPIClient method list.

@Override
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException {
    LOG.debug("Native direct list status for {}", path);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    String key = pathToKey(path);
    if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/") && !path.toString().equals(hostName)) {
        key = key + "/";
        LOG.debug("listNativeDirect modify key to {}", key);
    }
    Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
    ListObjectsRequest request = new ListObjectsRequest();
    request.setBucketName(mBucket);
    request.setMaxKeys(5000);
    request.setPrefix(key);
    if (!flatListing) {
        request.setDelimiter("/");
    }
    ObjectListing objectList = mClient.listObjects(request);
    List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
    List<String> commonPrefixes = objectList.getCommonPrefixes();
    boolean objectScanContinue = true;
    S3ObjectSummary prevObj = null;
    // start FTA logic
    boolean stocatorOrigin = isSparkOrigin(key, path.toString());
    if (stocatorOrigin) {
        LOG.debug("Stocator origin is true for {}", key);
        if (!isJobSuccessful(key)) {
            LOG.debug("{} created by failed Spark job. Skipped", key);
            if (fModeAutomaticDelete) {
                delete(hostName, new Path(key), true);
            }
            return new FileStatus[0];
        }
    }
    while (objectScanContinue) {
        for (S3ObjectSummary obj : objectSummaries) {
            if (prevObj == null) {
                prevObj = obj;
                prevObj.setKey(correctPlusSign(key, prevObj.getKey()));
                continue;
            }
            obj.setKey(correctPlusSign(key, obj.getKey()));
            String objKey = obj.getKey();
            String unifiedObjectName = extractUnifiedObjectName(objKey);
            LOG.trace("list candidate {}, unified name {}", objKey, unifiedObjectName);
            if (stocatorOrigin && !fullListing) {
                LOG.trace("{} created by Spark", unifiedObjectName);
                // we need to make sure there are no failed attempts
                if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
                    // found failed that was not aborted.
                    LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
                    if (prevObj.getSize() < obj.getSize()) {
                        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
                        prevObj = obj;
                    }
                    continue;
                }
            }
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() > 0 || fullListing) {
                LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
                if (filter == null) {
                    tmpResult.add(fs);
                } else if (filter != null && filter.accept(fs.getPath())) {
                    tmpResult.add(fs);
                } else {
                    LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
                }
            } else {
                emptyObjects.put(fs.getPath().toString(), fs);
            }
            prevObj = obj;
        }
        boolean isTruncated = objectList.isTruncated();
        if (isTruncated) {
            objectList = mClient.listNextBatchOfObjects(objectList);
            objectSummaries = objectList.getObjectSummaries();
        } else {
            objectScanContinue = false;
        }
    }
    if (prevObj != null) {
        FileStatus fs = createFileStatus(prevObj, hostName, path);
        LOG.trace("Adding the last object from the list {}", fs.getPath());
        if (fs.getLen() > 0 || fullListing) {
            LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
            if (filter == null) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else if (filter != null && filter.accept(fs.getPath())) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else {
                LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
            }
        } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
            emptyObjects.put(fs.getPath().toString(), fs);
        }
    }
    // get common prefixes
    for (String comPrefix : commonPrefixes) {
        LOG.trace("Common prefix is {}", comPrefix);
        if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) {
            FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix));
            LOG.trace("Match between common prefix and empty object {}. Adding to result", comPrefix);
            if (filter == null) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else if (filter != null && filter.accept(status.getPath())) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else {
                LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter);
            }
        }
    }
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
Also used : StocatorPath(com.ibm.stocator.fs.common.StocatorPath) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest)

Example 10 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project athenz by yahoo.

the class S3ChangeLogStore method listObjects.

/**
 * list the objects in the zts bucket. If the mod time is specified as 0
 * then we want to list all objects otherwise, we only list objects
 * that are newer than the specified timestamp
 * @param s3 AWS S3 client object
 * @param domains collection to be updated to include domain names
 * @param modTime only include domains newer than this timestamp
 */
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}", s3BucketName, modTime);
    }
    ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(s3BucketName));
    String objectName = null;
    while (objectListing != null) {
        // process each entry in our result set and add the domain
        // name to our return list
        final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
        boolean listTruncated = objectListing.isTruncated();
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}", objectSummaries.size(), listTruncated);
        }
        for (S3ObjectSummary objectSummary : objectSummaries) {
            if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
                continue;
            }
            // for now skip any folders/objects that start with '.'
            objectName = objectSummary.getKey();
            if (objectName.charAt(0) == '.') {
                continue;
            }
            domains.add(objectName);
        }
        if (!listTruncated) {
            break;
        }
        objectListing = s3.listNextBatchOfObjects(objectListing);
    }
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary)

Aggregations

ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)48 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)46 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)32 ArrayList (java.util.ArrayList)23 AmazonClientException (com.amazonaws.AmazonClientException)11 IOException (java.io.IOException)9 Path (org.apache.hadoop.fs.Path)9 HashMap (java.util.HashMap)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)8 DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 ListObjectsRequest (software.amazon.awssdk.services.s3.model.ListObjectsRequest)6 ListObjectsResponse (software.amazon.awssdk.services.s3.model.ListObjectsResponse)6 Date (java.util.Date)5 Test (org.junit.Test)5 S3Object (software.amazon.awssdk.services.s3.model.S3Object)5 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)4 FileNotFoundException (java.io.FileNotFoundException)4 S3Exception (software.amazon.awssdk.services.s3.model.S3Exception)4 AmazonServiceException (com.amazonaws.AmazonServiceException)3