Search in sources :

Example 11 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project herd by FINRAOS.

the class S3DaoTest method testListDirectoryAssertIgnoreDirectories.

@Test
public void testListDirectoryAssertIgnoreDirectories() {
    S3Operations originalS3Operations = (S3Operations) ReflectionTestUtils.getField(s3Dao, "s3Operations");
    S3Operations mockS3Operations = mock(S3Operations.class);
    ReflectionTestUtils.setField(s3Dao, "s3Operations", mockS3Operations);
    try {
        String s3BucketName = "s3BucketName";
        String s3KeyPrefix = "s3KeyPrefix";
        S3FileTransferRequestParamsDto s3FileTransferRequestParamsDto = new S3FileTransferRequestParamsDto();
        s3FileTransferRequestParamsDto.setS3BucketName(s3BucketName);
        s3FileTransferRequestParamsDto.setS3KeyPrefix(s3KeyPrefix);
        boolean ignoreZeroByteDirectoryMarkers = true;
        when(mockS3Operations.listObjects(any(), any())).then(new Answer<ObjectListing>() {

            @Override
            public ObjectListing answer(InvocationOnMock invocation) throws Throwable {
                ListObjectsRequest listObjectsRequest = invocation.getArgument(0);
                assertEquals(s3BucketName, listObjectsRequest.getBucketName());
                assertEquals(s3KeyPrefix, listObjectsRequest.getPrefix());
                ObjectListing objectListing = new ObjectListing();
                {
                    S3ObjectSummary s3ObjectSummary = new S3ObjectSummary();
                    s3ObjectSummary.setBucketName(s3BucketName);
                    s3ObjectSummary.setKey("valid/object/key");
                    s3ObjectSummary.setSize(1024l);
                    objectListing.getObjectSummaries().add(s3ObjectSummary);
                }
                {
                    S3ObjectSummary s3ObjectSummary = new S3ObjectSummary();
                    s3ObjectSummary.setBucketName(s3BucketName);
                    s3ObjectSummary.setKey("empty/file");
                    s3ObjectSummary.setSize(0l);
                    objectListing.getObjectSummaries().add(s3ObjectSummary);
                }
                {
                    S3ObjectSummary s3ObjectSummary = new S3ObjectSummary();
                    s3ObjectSummary.setBucketName(s3BucketName);
                    s3ObjectSummary.setKey("directory/path/");
                    s3ObjectSummary.setSize(0l);
                    objectListing.getObjectSummaries().add(s3ObjectSummary);
                }
                // directory with a non-zero size is impossible, but we have a conditional branch to cover
                {
                    S3ObjectSummary s3ObjectSummary = new S3ObjectSummary();
                    s3ObjectSummary.setBucketName(s3BucketName);
                    s3ObjectSummary.setKey("another/directory/path/");
                    s3ObjectSummary.setSize(1024l);
                    objectListing.getObjectSummaries().add(s3ObjectSummary);
                }
                return objectListing;
            }
        });
        List<S3ObjectSummary> s3ObjectSummaries = s3Dao.listDirectory(s3FileTransferRequestParamsDto, ignoreZeroByteDirectoryMarkers);
        assertEquals(3, s3ObjectSummaries.size());
        assertEquals("valid/object/key", s3ObjectSummaries.get(0).getKey());
        assertEquals(1024l, s3ObjectSummaries.get(0).getSize());
        assertEquals("empty/file", s3ObjectSummaries.get(1).getKey());
        assertEquals(0l, s3ObjectSummaries.get(1).getSize());
        assertEquals("another/directory/path/", s3ObjectSummaries.get(2).getKey());
        assertEquals(1024l, s3ObjectSummaries.get(2).getSize());
    } finally {
        ReflectionTestUtils.setField(s3Dao, "s3Operations", originalS3Operations);
    }
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) S3FileTransferRequestParamsDto(org.finra.herd.model.dto.S3FileTransferRequestParamsDto) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Test(org.junit.Test)

Example 12 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project spring-integration-aws by spring-projects.

the class S3Session method list.

@Override
public S3ObjectSummary[] list(String path) throws IOException {
    String[] bucketPrefix = splitPathToBucketAndKey(path, false);
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucketPrefix[0]);
    if (bucketPrefix.length > 1) {
        listObjectsRequest.setPrefix(bucketPrefix[1]);
    }
    /*
		For listing objects, Amazon S3 returns up to 1,000 keys in the response.
		If you have more than 1,000 keys in your bucket, the response will be truncated.
		You should always check for if the response is truncated.
		*/
    ObjectListing objectListing;
    List<S3ObjectSummary> objectSummaries = new ArrayList<>();
    do {
        objectListing = this.amazonS3.listObjects(listObjectsRequest);
        objectSummaries.addAll(objectListing.getObjectSummaries());
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());
    return objectSummaries.toArray(new S3ObjectSummary[objectSummaries.size()]);
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary)

Example 13 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project dataverse by IQSS.

the class S3AccessIO method listAuxObjects.

@Override
public List<String> listAuxObjects() throws IOException {
    if (!this.canWrite()) {
        open();
    }
    String prefix = getDestinationKey("");
    List<String> ret = new ArrayList<>();
    ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix);
    ObjectListing storedAuxFilesList = s3.listObjects(req);
    List<S3ObjectSummary> storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries();
    try {
        while (storedAuxFilesList.isTruncated()) {
            logger.fine("S3 listAuxObjects: going to next page of list");
            storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList);
            storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries());
        }
    } catch (AmazonClientException ase) {
        logger.warning("Caught an AmazonServiceException in S3AccessIO.listAuxObjects():    " + ase.getMessage());
        throw new IOException("S3AccessIO: Failed to get aux objects for listing.");
    }
    for (S3ObjectSummary item : storedAuxFilesSummary) {
        String destinationKey = item.getKey();
        String fileName = destinationKey.substring(destinationKey.lastIndexOf(".") + 1);
        logger.fine("S3 cached aux object fileName: " + fileName);
        ret.add(fileName);
    }
    return ret;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) AmazonClientException(com.amazonaws.AmazonClientException) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOException(java.io.IOException)

Example 14 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project stocator by CODAIT.

the class COSAPIClient method getFileStatus.

@Override
public FileStatus getFileStatus(String hostName, Path path, String msg) throws IOException, FileNotFoundException {
    FileStatus res = null;
    FileStatus cached = memoryCache.getFileStatus(path.toString());
    if (cached != null) {
        return cached;
    }
    LOG.trace("getFileStatus(start) for {}, hostname: {}", path, hostName);
    /*
     * The requested path is equal to hostName. HostName is equal to
     * hostNameScheme, thus the container. Therefore we have no object to look
     * for and we return the FileStatus as a directory. Containers have to
     * lastModified.
     */
    if (path.toString().equals(hostName) || (path.toString().length() + 1 == hostName.length())) {
        LOG.trace("getFileStatus(completed) {}", path);
        res = new FileStatus(0L, true, 1, mBlockSize, 0L, path);
        memoryCache.putFileStatus(path.toString(), res);
        return res;
    }
    if (path.toString().contains(HADOOP_TEMPORARY)) {
        LOG.debug("getFileStatus on temp object {}. Return not found", path.toString());
        throw new FileNotFoundException("Not found " + path.toString());
    }
    String key = pathToKey(path);
    LOG.debug("getFileStatus: on original key {}", key);
    FileStatus fileStatus = null;
    try {
        fileStatus = getFileStatusKeyBased(key, path);
    } catch (AmazonS3Exception e) {
        LOG.warn("file status {} returned {}", key, e.getStatusCode());
        if (e.getStatusCode() != 404) {
            LOG.warn("Throw IOException for {}. Most likely authentication failed", key);
            throw new IOException(e);
        }
    }
    if (fileStatus != null) {
        LOG.trace("getFileStatus(completed) {}", path);
        memoryCache.putFileStatus(path.toString(), fileStatus);
        return fileStatus;
    }
    // probably not needed this call
    if (!key.endsWith("/")) {
        String newKey = key + "/";
        try {
            LOG.debug("getFileStatus: original key not found. Alternative key {}", newKey);
            fileStatus = getFileStatusKeyBased(newKey, path);
        } catch (AmazonS3Exception e) {
            if (e.getStatusCode() != 404) {
                throw new IOException(e);
            }
        }
        if (fileStatus != null) {
            LOG.trace("getFileStatus(completed) {}", path);
            memoryCache.putFileStatus(path.toString(), fileStatus);
            return fileStatus;
        } else {
            // if here: both key and key/ returned not found.
            // trying to see if pseudo directory of the form
            // a/b/key/d/e (a/b/key/ doesn't exists by itself)
            // perform listing on the key
            LOG.debug("getFileStatus: Modifined key {} not found. Trying to list", key);
            key = maybeAddTrailingSlash(key);
            ListObjectsRequest request = new ListObjectsRequest();
            request.setBucketName(mBucket);
            request.setPrefix(key);
            request.setDelimiter("/");
            request.setMaxKeys(1);
            ObjectListing objects = mClient.listObjects(request);
            if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjectSummaries().isEmpty()) {
                LOG.debug("getFileStatus(completed) {}", path);
                res = new FileStatus(0, true, 1, 0, 0, path);
                memoryCache.putFileStatus(path.toString(), res);
                return res;
            } else if (key.isEmpty()) {
                LOG.trace("Found root directory");
                LOG.debug("getFileStatus(completed) {}", path);
                res = new FileStatus(0, true, 1, 0, 0, path);
                memoryCache.putFileStatus(path.toString(), res);
                return res;
            }
        }
    }
    LOG.debug("Not found {}. Throw FNF exception", path.toString());
    throw new FileNotFoundException("Not found " + path.toString());
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) FileNotFoundException(java.io.FileNotFoundException) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) AmazonS3Exception(com.amazonaws.services.s3.model.AmazonS3Exception) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 15 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project stocator by CODAIT.

the class COSAPIClient method list.

@Override
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException {
    LOG.debug("Native direct list status for {}", path);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    String key = pathToKey(path);
    if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/") && !path.toString().equals(hostName)) {
        key = key + "/";
        LOG.debug("listNativeDirect modify key to {}", key);
    }
    Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
    ListObjectsRequest request = new ListObjectsRequest();
    request.setBucketName(mBucket);
    request.setMaxKeys(5000);
    request.setPrefix(key);
    if (!flatListing) {
        request.setDelimiter("/");
    }
    ObjectListing objectList = mClient.listObjects(request);
    List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
    List<String> commonPrefixes = objectList.getCommonPrefixes();
    boolean objectScanContinue = true;
    S3ObjectSummary prevObj = null;
    // start FTA logic
    boolean stocatorOrigin = isSparkOrigin(key, path.toString());
    if (stocatorOrigin) {
        LOG.debug("Stocator origin is true for {}", key);
        if (!isJobSuccessful(key)) {
            LOG.debug("{} created by failed Spark job. Skipped", key);
            if (fModeAutomaticDelete) {
                delete(hostName, new Path(key), true);
            }
            return new FileStatus[0];
        }
    }
    while (objectScanContinue) {
        for (S3ObjectSummary obj : objectSummaries) {
            if (prevObj == null) {
                prevObj = obj;
                prevObj.setKey(correctPlusSign(key, prevObj.getKey()));
                continue;
            }
            obj.setKey(correctPlusSign(key, obj.getKey()));
            String objKey = obj.getKey();
            String unifiedObjectName = extractUnifiedObjectName(objKey);
            LOG.trace("list candidate {}, unified name {}", objKey, unifiedObjectName);
            if (stocatorOrigin && !fullListing) {
                LOG.trace("{} created by Spark", unifiedObjectName);
                // we need to make sure there are no failed attempts
                if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
                    // found failed that was not aborted.
                    LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
                    if (prevObj.getSize() < obj.getSize()) {
                        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
                        prevObj = obj;
                    }
                    continue;
                }
            }
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() > 0 || fullListing) {
                LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
                if (filter == null) {
                    tmpResult.add(fs);
                } else if (filter != null && filter.accept(fs.getPath())) {
                    tmpResult.add(fs);
                } else {
                    LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
                }
            } else {
                emptyObjects.put(fs.getPath().toString(), fs);
            }
            prevObj = obj;
        }
        boolean isTruncated = objectList.isTruncated();
        if (isTruncated) {
            objectList = mClient.listNextBatchOfObjects(objectList);
            objectSummaries = objectList.getObjectSummaries();
        } else {
            objectScanContinue = false;
        }
    }
    if (prevObj != null) {
        FileStatus fs = createFileStatus(prevObj, hostName, path);
        LOG.trace("Adding the last object from the list {}", fs.getPath());
        if (fs.getLen() > 0 || fullListing) {
            LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
            if (filter == null) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else if (filter != null && filter.accept(fs.getPath())) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else {
                LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
            }
        } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
            emptyObjects.put(fs.getPath().toString(), fs);
        }
    }
    // get common prefixes
    for (String comPrefix : commonPrefixes) {
        LOG.trace("Common prefix is {}", comPrefix);
        if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) {
            FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix));
            LOG.trace("Match between common prefix and empty object {}. Adding to result", comPrefix);
            if (filter == null) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else if (filter != null && filter.accept(status.getPath())) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else {
                LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter);
            }
        }
    }
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
Also used : StocatorPath(com.ibm.stocator.fs.common.StocatorPath) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest)

Aggregations

ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)48 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)46 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)32 ArrayList (java.util.ArrayList)23 AmazonClientException (com.amazonaws.AmazonClientException)11 IOException (java.io.IOException)9 Path (org.apache.hadoop.fs.Path)9 HashMap (java.util.HashMap)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)8 DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 ListObjectsRequest (software.amazon.awssdk.services.s3.model.ListObjectsRequest)6 ListObjectsResponse (software.amazon.awssdk.services.s3.model.ListObjectsResponse)6 Date (java.util.Date)5 Test (org.junit.Test)5 S3Object (software.amazon.awssdk.services.s3.model.S3Object)5 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)4 FileNotFoundException (java.io.FileNotFoundException)4 S3Exception (software.amazon.awssdk.services.s3.model.S3Exception)4 AmazonServiceException (com.amazonaws.AmazonServiceException)3