Search in sources :

Example 46 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project stocator by SparkTC.

the class COSAPIClient method internalList.

private FileStatus[] internalList(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter, boolean cleanup) throws FileNotFoundException, IOException {
    LOG.debug("list:(start) {}. full listing {}, prefix based {}, flat list {}", path, fullListing, prefixBased, flatListing);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    String targetListKey = pathToKey(path);
    if (isDirectory != null && isDirectory.booleanValue() && !targetListKey.endsWith("/") && !path.toString().equals(hostName)) {
        targetListKey = targetListKey + "/";
        LOG.debug("list:(mid) {}, modify key to {}", path, targetListKey);
    }
    Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
    ListObjectsRequest request = new ListObjectsRequest();
    request.setBucketName(mBucket);
    request.setMaxKeys(5000);
    request.setPrefix(targetListKey);
    request.withEncodingType("url");
    if (!flatListing) {
        LOG.trace("list:(mid) {}, set delimiter", path);
        request.setDelimiter("/");
    }
    ObjectListing objectList = mClient.listObjects(request);
    String encoding = objectList.getEncodingType();
    LOG.debug("Encoding Type: {}", objectList.getEncodingType());
    List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
    List<String> commonPrefixes = objectList.getCommonPrefixes();
    boolean objectScanContinue = true;
    S3ObjectSummary prevObj = null;
    Boolean stocatorListKeyOrigin = null;
    // make it global
    boolean stocatorUnifiedObjectNameOrigin = false;
    String unifiedObjectName = null;
    boolean stocatorUnifiedObjectNameOriginSuccess = true;
    while (objectScanContinue) {
        for (S3ObjectSummary obj : objectSummaries) {
            if (prevObj == null) {
                prevObj = obj;
                prevObj.setKey(correctPlusSign(targetListKey, decodePath(prevObj.getKey(), encoding)));
                continue;
            }
            obj.setKey(correctPlusSign(targetListKey, decodePath(obj.getKey(), encoding)));
            String objKey = obj.getKey();
            LOG.trace("Proceeding key {} from the list ", objKey);
            if (stocatorPath.isHadoopSuccessFormat(objKey)) {
                // object name of the form /<URI>/<parent>/_SUCCESS
                // in this case no need to check stocator origin
                unifiedObjectName = stocatorPath.removePartOrSuccess(objKey);
                stocatorUnifiedObjectNameOrigin = false;
                stocatorUnifiedObjectNameOriginSuccess = updateSuccessfullJobStatus(unifiedObjectName);
            } else if (stocatorPath.isHadoopStocatorDataFormat(objKey)) {
                // object key has part- and attempt_ or _SUCCESS
                // need to find unified name, as it was created with Stocator
                stocatorUnifiedObjectNameOrigin = true;
                if (unifiedObjectName != null && objKey.startsWith(unifiedObjectName)) {
                    stocatorUnifiedObjectNameOriginSuccess = isJobSuccessful(unifiedObjectName);
                } else {
                    String unifiedCandidate = stocatorPath.removePartOrSuccess(objKey);
                    LOG.trace("Key: {}, unified name: {}, unified candidate {}", objKey, unifiedObjectName, unifiedCandidate);
                    if (unifiedCandidate.isEmpty() || unifiedCandidate.equals("/")) {
                        LOG.trace("Checking unified candidate {}", unifiedCandidate);
                        stocatorUnifiedObjectNameOriginSuccess = isJobSuccessful(unifiedCandidate);
                    } else {
                        int ind = 0;
                        LOG.trace("Unified candidate {}", unifiedCandidate);
                        while (ind >= 0) {
                            LOG.trace("processing {}", unifiedCandidate);
                            stocatorUnifiedObjectNameOriginSuccess = isJobSuccessful(unifiedCandidate);
                            if (stocatorUnifiedObjectNameOriginSuccess) {
                                break;
                            }
                            int endIndex = unifiedCandidate.length();
                            if (unifiedCandidate.endsWith("/")) {
                                endIndex = unifiedCandidate.length() - 2;
                            }
                            ind = unifiedCandidate.lastIndexOf("/", endIndex);
                            if (ind >= 0) {
                                unifiedCandidate = unifiedCandidate.substring(0, ind + 1);
                            }
                        }
                    }
                    unifiedObjectName = unifiedCandidate;
                }
                LOG.trace("Candidate {} created by Stocator, " + " unifiedObjectName: {}, stocatorUnifiedObjectNameOrigin: {}," + " stocatorUnifiedObjectNameOriginSuccess {}", objKey, unifiedObjectName, stocatorUnifiedObjectNameOrigin, stocatorUnifiedObjectNameOriginSuccess);
            } else {
                LOG.trace("Candidate {} bypass Stocator check, " + " unifiedObjectName: {}, stocatorUnifiedObjectNameOrigin: {}," + " stocatorUnifiedObjectNameOriginSuccess {}", objKey, unifiedObjectName, stocatorUnifiedObjectNameOrigin, stocatorUnifiedObjectNameOriginSuccess);
                stocatorUnifiedObjectNameOrigin = false;
                stocatorUnifiedObjectNameOriginSuccess = true;
            }
            if (stocatorUnifiedObjectNameOrigin && !fullListing) {
                if (!stocatorUnifiedObjectNameOriginSuccess) {
                    // having unified name as a prefix
                    continue;
                }
                LOG.trace("{} created by Stocator", unifiedObjectName);
                // we need to make sure there are no failed attempts
                if (stocatorPath.nameWithoutTaskID(objKey).equals(stocatorPath.nameWithoutTaskID(prevObj.getKey()))) {
                    // found failed that was not aborted.
                    LOG.trace("Collision found between {} and {}", prevObj.getKey(), objKey);
                    if (prevObj.getSize() < obj.getSize()) {
                        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
                        if (cleanup) {
                            String newMergedPath = getMergedPath(hostName, path, prevObj.getKey());
                            LOG.warn("Delete failed data part {}", newMergedPath);
                            delete(hostName, new Path(newMergedPath), true);
                        }
                        prevObj = obj;
                    } else {
                        if (cleanup) {
                            String newMergedPath = getMergedPath(hostName, path, obj.getKey());
                            LOG.warn("Delete failed data part {}", newMergedPath);
                            delete(hostName, new Path(newMergedPath), true);
                        }
                    }
                    continue;
                }
            }
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() > 0 || fullListing) {
                if (filter == null) {
                    LOG.trace("Adding {} size {} to response list", fs.getPath(), fs.getLen());
                    tmpResult.add(fs);
                } else if (filter != null && filter.accept(fs.getPath())) {
                    tmpResult.add(fs);
                } else {
                    LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
                }
            } else {
                LOG.trace("Adding {} to the empty list", fs.getPath());
                emptyObjects.put(fs.getPath().toString(), fs);
            }
            prevObj = obj;
        }
        // add common prefixes
        LOG.trace("Going to examine common prefixes for {}", targetListKey);
        if (prevObj != null) {
            LOG.trace("Previous object registered as {}", prevObj.getKey());
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() == 0 && (!fs.getPath().getName().equals(HADOOP_SUCCESS))) {
                LOG.trace("Adding previous object {} to empty objects list", fs.getPath());
                emptyObjects.put(fs.getPath().toString(), fs);
            }
        }
        for (String comPrefix : commonPrefixes) {
            LOG.trace("Common prefix is {}", comPrefix);
            Path qualifiedPath = keyToQualifiedPath(hostName, comPrefix);
            qualifiedPath = COSUtils.decodePath(qualifiedPath, encoding);
            FileStatus status = new COSFileStatus(true, false, qualifiedPath);
            if (filter == null) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else if (filter != null && filter.accept(status.getPath())) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else {
                LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter);
            }
        }
        boolean isTruncated = objectList.isTruncated();
        if (isTruncated) {
            objectList.setEncodingType("url");
            objectList = mClient.listNextBatchOfObjects(objectList);
            objectSummaries = objectList.getObjectSummaries();
            commonPrefixes = objectList.getCommonPrefixes();
        } else {
            objectScanContinue = false;
        }
    }
    if (prevObj != null) {
        LOG.trace("Examine the last object {}", prevObj.getKey());
        FileStatus fs = createFileStatus(prevObj, hostName, path);
        LOG.trace("Last object fs path transormed to {}", fs.getPath());
        if (fs.getLen() > 0 || fullListing) {
            if (filter == null) {
                LOG.trace("Adding {} size {} to response list", fs.getPath(), fs.getLen());
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else if (filter != null && filter.accept(fs.getPath())) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else {
                LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
            }
        } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
            LOG.trace("Adding last object {} to empty objects list", fs.getPath());
            emptyObjects.put(fs.getPath().toString(), fs);
        }
    }
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
Also used : StocatorPath(com.ibm.stocator.fs.common.StocatorPath) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest)

Example 47 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project ats-framework by Axway.

the class S3Operations method deleteObjects.

/**
 * Delete all objects matching given prefix. This method is preferred for efficient deletion of many files
 *
 * @param folderPrefix empty path is expected for objects in the "root" of the bucket
 * @param searchString what pattern to be matched. This pattern will be matched against "short file name", i.e.
 *                     the object's ID after last path separator (&quot;/&quot;).<br />
 *                     If null it means all ( string &quot;.*&quot;).
 * @param recursive if true searches recursively for matching in nested path levels (&quot;/&quot;)
 *
 * @return list of deleted objects
 * @throws S3OperationException in case of an error from server
 */
@PublicAtsApi
public void deleteObjects(String folderPrefix, String searchString, boolean recursive) {
    // Alternative but not documented in S3 API: getClient().listObjectsV2(bucket, "prefix")
    ListObjectsRequest request = new ListObjectsRequest(bucketName, folderPrefix, null, recursive ? null : "/", null);
    int totallyDeleted = 0;
    try {
        ObjectListing objectListing = s3Client.listObjects(request);
        int i = 0;
        if (searchString == null) {
            // any string
            searchString = ".*";
        }
        List<KeyVersion> keysForDelete = new ArrayList<KeyVersion>(100);
        Pattern searchStringPattern = Pattern.compile(searchString);
        while (true) {
            keysForDelete.clear();
            for (Iterator<?> iterator = objectListing.getObjectSummaries().iterator(); iterator.hasNext(); ) {
                S3ObjectSummary objectSummary = (S3ObjectSummary) iterator.next();
                if (LOG.isTraceEnabled()) {
                    LOG.trace("listObject[" + (++i) + "]: " + objectSummary.toString());
                }
                String[] fileTokens = objectSummary.getKey().split("/");
                String s3Object = fileTokens[fileTokens.length - 1];
                Matcher matcher = searchStringPattern.matcher(s3Object);
                if (matcher.find()) {
                    keysForDelete.add(new KeyVersion(objectSummary.getKey()));
                // allListElements.add(new S3ObjectInfo(objectSummary));
                }
            }
            if (keysForDelete.size() > 0) {
                // delete current set / batch size
                DeleteObjectsRequest multiObjectDeleteRequest = new DeleteObjectsRequest(bucketName).withKeys(keysForDelete).withQuiet(false);
                DeleteObjectsResult delObjRes = s3Client.deleteObjects(multiObjectDeleteRequest);
                int currentlyDeletedCount = delObjRes.getDeletedObjects().size();
                totallyDeleted = totallyDeleted + currentlyDeletedCount;
                // verify size of deleted objects
                if (keysForDelete.size() != currentlyDeletedCount) {
                    LOG.warn("The number of actually deleted objects " + currentlyDeletedCount + " does not match the expected size of " + keysForDelete.size());
                } else {
                    LOG.debug("Number of deleted S3 objects in current batch is " + currentlyDeletedCount);
                }
            }
            // more objects to retrieve (1K batch size of objects)
            if (objectListing.isTruncated()) {
                objectListing = s3Client.listNextBatchOfObjects(objectListing);
            } else {
                break;
            }
        }
        LOG.info("Successfully deleted " + totallyDeleted + " objects");
    } catch (AmazonClientException e) {
        throw new S3OperationException("Error deleting multiple objects matching pattern " + searchString + ". Number of deleted objects is " + totallyDeleted, e);
    }
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) KeyVersion(com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion) AmazonClientException(com.amazonaws.AmazonClientException) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) PublicAtsApi(com.axway.ats.common.PublicAtsApi)

Example 48 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project spring-integration-aws by spring-projects.

the class S3Session method listNames.

@Override
public String[] listNames(String path) throws IOException {
    String[] bucketPrefix = splitPathToBucketAndKey(path, false);
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucketPrefix[0]);
    if (bucketPrefix.length > 1) {
        listObjectsRequest.setPrefix(bucketPrefix[1]);
    }
    /*
		For listing objects, Amazon S3 returns up to 1,000 keys in the response.
		If you have more than 1,000 keys in your bucket, the response will be truncated.
		You should always check for if the response is truncated.
		*/
    ObjectListing objectListing;
    List<String> names = new ArrayList<>();
    do {
        objectListing = this.amazonS3.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            names.add(objectSummary.getKey());
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());
    return names.toArray(new String[names.size()]);
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary)

Example 49 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project alluxio by Alluxio.

the class S3AUnderFileSystem method getObjectListingChunk.

@Override
@Nullable
protected ObjectListingChunk getObjectListingChunk(String key, boolean recursive) throws IOException {
    String delimiter = recursive ? "" : PATH_SEPARATOR;
    key = PathUtils.normalizePath(key, PATH_SEPARATOR);
    // In case key is root (empty string) do not normalize prefix.
    key = key.equals(PATH_SEPARATOR) ? "" : key;
    if (mUfsConf.isSet(PropertyKey.UNDERFS_S3_LIST_OBJECTS_V1) && mUfsConf.getBoolean(PropertyKey.UNDERFS_S3_LIST_OBJECTS_V1)) {
        ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucketName).withPrefix(key).withDelimiter(delimiter).withMaxKeys(getListingChunkLength(mUfsConf));
        ObjectListing result = getObjectListingChunkV1(request);
        if (result != null) {
            return new S3AObjectListingChunkV1(request, result);
        }
    } else {
        ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(mBucketName).withPrefix(key).withDelimiter(delimiter).withMaxKeys(getListingChunkLength(mUfsConf));
        ListObjectsV2Result result = getObjectListingChunk(request);
        if (result != null) {
            return new S3AObjectListingChunk(request, result);
        }
    }
    return null;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) Nullable(javax.annotation.Nullable)

Example 50 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project onebusaway-application-modules by camsys.

the class S3FileServiceImpl method list.

@Override
public /**
 * list the files in the given directory.
 */
List<String> list(String directory, int maxResults) {
    ListObjectsRequest request = new ListObjectsRequest(_bucketName, directory, null, null, maxResults);
    ObjectListing listing = _s3.listObjects(request);
    List<String> rows = new ArrayList<String>();
    for (S3ObjectSummary summary : listing.getObjectSummaries()) {
        // if its a directory at the root level
        if (!summary.getKey().endsWith("/")) {
            rows.add(summary.getKey());
        }
    }
    return rows;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary)

Aggregations

ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)48 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)46 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)32 ArrayList (java.util.ArrayList)23 AmazonClientException (com.amazonaws.AmazonClientException)11 IOException (java.io.IOException)9 Path (org.apache.hadoop.fs.Path)9 HashMap (java.util.HashMap)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)8 DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 ListObjectsRequest (software.amazon.awssdk.services.s3.model.ListObjectsRequest)6 ListObjectsResponse (software.amazon.awssdk.services.s3.model.ListObjectsResponse)6 Date (java.util.Date)5 Test (org.junit.Test)5 S3Object (software.amazon.awssdk.services.s3.model.S3Object)5 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)4 FileNotFoundException (java.io.FileNotFoundException)4 S3Exception (software.amazon.awssdk.services.s3.model.S3Exception)4 AmazonServiceException (com.amazonaws.AmazonServiceException)3