Example 1 with S3ObjectSummary

use of in project deeplearning4j by deeplearning4j.

the class S3Downloader method keysForBucket.

     * Return the keys for a bucket
     * @param bucket the bucket to get the keys for
     * @return the bucket's keys
public List<String> keysForBucket(String bucket) {
    AmazonS3 s3 = getClient();
    List<String> ret = new ArrayList<>();
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket);
    ObjectListing objectListing;
    do {
        objectListing = s3.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
    } while (objectListing.isTruncated());
    return ret;
Also used : AmazonS3( ArrayList(java.util.ArrayList)

Example 2 with S3ObjectSummary

use of in project hadoop by apache.

the class S3AFileSystem method innerRename.

   * The inner rename operation. See {@link #rename(Path, Path)} for
   * the description of the operation.
   * This operation throws an exception on any failure which needs to be
   * reported and downgraded to a failure. That is: if a rename
   * @param src path to be renamed
   * @param dst new path after rename
   * @throws RenameFailedException if some criteria for a state changing
   * rename was not met. This means work didn't happen; it's not something
   * which is reported upstream to the FileSystem APIs, for which the semantics
   * of "false" are pretty vague.
   * @throws FileNotFoundException there's no source file.
   * @throws IOException on IO failure.
   * @throws AmazonClientException on failures inside the AWS SDK
private boolean innerRename(Path src, Path dst) throws RenameFailedException, FileNotFoundException, IOException, AmazonClientException {
    LOG.debug("Rename path {} to {}", src, dst);
    String srcKey = pathToKey(src);
    String dstKey = pathToKey(dst);
    if (srcKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "source is root directory");
    if (dstKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "dest is root directory");
    // get the source file status; this raises a FNFE if there is no source
    // file.
    S3AFileStatus srcStatus = getFileStatus(src);
    if (srcKey.equals(dstKey)) {
        LOG.debug("rename: src and dest refer to the same file or directory: {}", dst);
        throw new RenameFailedException(src, dst, "source and dest refer to the same file or directory").withExitCode(srcStatus.isFile());
    S3AFileStatus dstStatus = null;
    try {
        dstStatus = getFileStatus(dst);
        // whether or not it can be the destination of the rename.
        if (srcStatus.isDirectory()) {
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "source is a directory and dest is a file").withExitCode(srcStatus.isFile());
            } else if (!dstStatus.isEmptyDirectory()) {
                throw new RenameFailedException(src, dst, "Destination is a non-empty directory").withExitCode(false);
        // at this point the destination is an empty directory
        } else {
            // empty or not
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "Cannot rename onto an existing file").withExitCode(false);
    } catch (FileNotFoundException e) {
        LOG.debug("rename: destination path {} not found", dst);
        // Parent must exist
        Path parent = dst.getParent();
        if (!pathToKey(parent).isEmpty()) {
            try {
                S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
                if (!dstParentStatus.isDirectory()) {
                    throw new RenameFailedException(src, dst, "destination parent is not a directory");
            } catch (FileNotFoundException e2) {
                throw new RenameFailedException(src, dst, "destination has no parent ");
    // Ok! Time to start
    if (srcStatus.isFile()) {
        LOG.debug("rename: renaming file {} to {}", src, dst);
        if (dstStatus != null && dstStatus.isDirectory()) {
            String newDstKey = dstKey;
            if (!newDstKey.endsWith("/")) {
                newDstKey = newDstKey + "/";
            String filename = srcKey.substring(pathToKey(src.getParent()).length() + 1);
            newDstKey = newDstKey + filename;
            copyFile(srcKey, newDstKey, srcStatus.getLen());
        } else {
            copyFile(srcKey, dstKey, srcStatus.getLen());
        innerDelete(srcStatus, false);
    } else {
        LOG.debug("rename: renaming directory {} to {}", src, dst);
        // This is a directory to directory copy
        if (!dstKey.endsWith("/")) {
            dstKey = dstKey + "/";
        if (!srcKey.endsWith("/")) {
            srcKey = srcKey + "/";
        //Verify dest is not a child of the source directory
        if (dstKey.startsWith(srcKey)) {
            throw new RenameFailedException(srcKey, dstKey, "cannot rename a directory to a subdirectory o fitself ");
        List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
        if (dstStatus != null && dstStatus.isEmptyDirectory()) {
            // delete unnecessary fake directory.
            keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
        ListObjectsRequest request = new ListObjectsRequest();
        ObjectListing objects = listObjects(request);
        while (true) {
            for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                keysToDelete.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
                String newDstKey = dstKey + summary.getKey().substring(srcKey.length());
                copyFile(summary.getKey(), newDstKey, summary.getSize());
                if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
                    removeKeys(keysToDelete, true, false);
            if (objects.isTruncated()) {
                objects = continueListObjects(objects);
            } else {
                if (!keysToDelete.isEmpty()) {
                    removeKeys(keysToDelete, false, false);
    if (src.getParent() != dst.getParent()) {
    return true;
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException( ArrayList(java.util.ArrayList) ObjectListing( S3ObjectSummary( DeleteObjectsRequest( ListObjectsRequest(

Example 3 with S3ObjectSummary

use of in project crate by crate.

the class FileReadingCollectorTest method createBatchIterator.

private BatchIterator createBatchIterator(Collection<String> fileUris, String compression, final S3ObjectInputStream s3InputStream) {
    Reference raw = createReference("_raw", DataTypes.STRING);
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
    return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, ImmutableMap.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory(), S3FileInputFactory.NAME, () -> new S3FileInput(new S3ClientHelper() {

        protected AmazonS3 initClient(String accessKey, String secretKey) throws IOException {
            AmazonS3 client = mock(AmazonS3Client.class);
            ObjectListing objectListing = mock(ObjectListing.class);
            S3ObjectSummary summary = mock(S3ObjectSummary.class);
            S3Object s3Object = mock(S3Object.class);
            when(client.listObjects(anyString(), anyString())).thenReturn(objectListing);
            when(client.getObject("fakebucket", "foo")).thenReturn(s3Object);
            return client;
    })), false, 1, 0);
Also used : InputFactory(io.crate.operation.InputFactory) AmazonS3( TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) ObjectListing( S3ObjectSummary( AmazonS3Client( FileLineReferenceResolver(io.crate.operation.reference.file.FileLineReferenceResolver) S3ClientHelper(io.crate.external.S3ClientHelper) S3Object(

Example 4 with S3ObjectSummary

use of in project crate by crate.

the class S3FileInputTest method objectSummaries.

private List<S3ObjectSummary> objectSummaries() {
    listObjectSummaries = new LinkedList<>();
    S3ObjectSummary firstObj = new S3ObjectSummary();
    S3ObjectSummary secondObj = new S3ObjectSummary();
    return listObjectSummaries;
Also used : S3ObjectSummary(

Example 5 with S3ObjectSummary

use of in project jackrabbit-oak by apache.

the class S3Backend method deleteAllOlderThan.

public Set<DataIdentifier> deleteAllOlderThan(long min) throws DataStoreException {
    long start = System.currentTimeMillis();
    // S3 stores lastModified to lower boundary of timestamp in ms.
    // and hence min is reduced by 1000ms.
    min = min - 1000;
    Set<DataIdentifier> deleteIdSet = new HashSet<DataIdentifier>(30);
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        while (true) {
            List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                if (!s3ObjSumm.getKey().startsWith(META_KEY_PREFIX)) {
                    DataIdentifier identifier = new DataIdentifier(getIdentifierName(s3ObjSumm.getKey()));
                    long lastModified = s3ObjSumm.getLastModified().getTime();
                    LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
                    if (lastModified < min && store.confirmDelete(identifier) && //  order is important here
                    s3service.getObjectMetadata(bucket, s3ObjSumm.getKey()).getLastModified().getTime() < min) {
                        LOG.debug("add id [{}] to delete lists", s3ObjSumm.getKey());
                        deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
            if (deleteList.size() > 0) {
                DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                if (dobjs.getDeletedObjects().size() != deleteList.size()) {
                    throw new DataStoreException("Incomplete delete object request. only  " + dobjs.getDeletedObjects().size() + " out of " + deleteList.size() + " are deleted");
                } else {
                    LOG.debug("[{}] records deleted from datastore", deleteList);
            if (!prevObjectListing.isTruncated()) {
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
    } finally {
        if (contextClassLoader != null) {
    }"deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms", new Object[] { min, deleteIdSet, deleteIdSet.size(), (System.currentTimeMillis() - start) });
    return deleteIdSet;
Also used : DataStoreException( DataIdentifier( ArrayList(java.util.ArrayList) ObjectListing( S3ObjectSummary( DeleteObjectsResult( DeleteObjectsRequest( HashSet(java.util.HashSet)


S3ObjectSummary ( ObjectListing ( ArrayList (java.util.ArrayList)64 ListObjectsRequest ( Test (org.junit.Test)50 Date (java.util.Date)29 DeleteObjectsRequest ( ListObjectsV2Result ( Test (org.testng.annotations.Test)25 AmazonS3 ( S3Object ( AmazonClientException (com.amazonaws.AmazonClientException)18 IOException ( S3FileTransferRequestParamsDto (org.finra.herd.model.dto.S3FileTransferRequestParamsDto)16 AmazonServiceException (com.amazonaws.AmazonServiceException)14 ListObjectsV2Request ( File ( HashMap (java.util.HashMap)13 BusinessObjectDataKey (org.finra.herd.model.api.xml.BusinessObjectDataKey)13 StorageFile (org.finra.herd.model.api.xml.StorageFile)13