Search in sources :

Example 1 with S3ObjectSummary

use of com.amazonaws.services.s3.model.S3ObjectSummary in project deeplearning4j by deeplearning4j.

the class S3Downloader method keysForBucket.

/**
     * Return the keys for a bucket
     * @param bucket the bucket to get the keys for
     * @return the bucket's keys
     */
public List<String> keysForBucket(String bucket) {
    AmazonS3 s3 = getClient();
    List<String> ret = new ArrayList<>();
    ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket);
    ObjectListing objectListing;
    do {
        objectListing = s3.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
            ret.add(objectSummary.getKey());
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
    } while (objectListing.isTruncated());
    return ret;
}
Also used : AmazonS3(com.amazonaws.services.s3.AmazonS3) ArrayList(java.util.ArrayList)

Example 2 with S3ObjectSummary

use of com.amazonaws.services.s3.model.S3ObjectSummary in project hadoop by apache.

the class S3AFileSystem method innerRename.

/**
   * The inner rename operation. See {@link #rename(Path, Path)} for
   * the description of the operation.
   * This operation throws an exception on any failure which needs to be
   * reported and downgraded to a failure. That is: if a rename
   * @param src path to be renamed
   * @param dst new path after rename
   * @throws RenameFailedException if some criteria for a state changing
   * rename was not met. This means work didn't happen; it's not something
   * which is reported upstream to the FileSystem APIs, for which the semantics
   * of "false" are pretty vague.
   * @throws FileNotFoundException there's no source file.
   * @throws IOException on IO failure.
   * @throws AmazonClientException on failures inside the AWS SDK
   */
private boolean innerRename(Path src, Path dst) throws RenameFailedException, FileNotFoundException, IOException, AmazonClientException {
    LOG.debug("Rename path {} to {}", src, dst);
    incrementStatistic(INVOCATION_RENAME);
    String srcKey = pathToKey(src);
    String dstKey = pathToKey(dst);
    if (srcKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "source is root directory");
    }
    if (dstKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "dest is root directory");
    }
    // get the source file status; this raises a FNFE if there is no source
    // file.
    S3AFileStatus srcStatus = getFileStatus(src);
    if (srcKey.equals(dstKey)) {
        LOG.debug("rename: src and dest refer to the same file or directory: {}", dst);
        throw new RenameFailedException(src, dst, "source and dest refer to the same file or directory").withExitCode(srcStatus.isFile());
    }
    S3AFileStatus dstStatus = null;
    try {
        dstStatus = getFileStatus(dst);
        // whether or not it can be the destination of the rename.
        if (srcStatus.isDirectory()) {
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "source is a directory and dest is a file").withExitCode(srcStatus.isFile());
            } else if (!dstStatus.isEmptyDirectory()) {
                throw new RenameFailedException(src, dst, "Destination is a non-empty directory").withExitCode(false);
            }
        // at this point the destination is an empty directory
        } else {
            // empty or not
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "Cannot rename onto an existing file").withExitCode(false);
            }
        }
    } catch (FileNotFoundException e) {
        LOG.debug("rename: destination path {} not found", dst);
        // Parent must exist
        Path parent = dst.getParent();
        if (!pathToKey(parent).isEmpty()) {
            try {
                S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
                if (!dstParentStatus.isDirectory()) {
                    throw new RenameFailedException(src, dst, "destination parent is not a directory");
                }
            } catch (FileNotFoundException e2) {
                throw new RenameFailedException(src, dst, "destination has no parent ");
            }
        }
    }
    // Ok! Time to start
    if (srcStatus.isFile()) {
        LOG.debug("rename: renaming file {} to {}", src, dst);
        if (dstStatus != null && dstStatus.isDirectory()) {
            String newDstKey = dstKey;
            if (!newDstKey.endsWith("/")) {
                newDstKey = newDstKey + "/";
            }
            String filename = srcKey.substring(pathToKey(src.getParent()).length() + 1);
            newDstKey = newDstKey + filename;
            copyFile(srcKey, newDstKey, srcStatus.getLen());
        } else {
            copyFile(srcKey, dstKey, srcStatus.getLen());
        }
        innerDelete(srcStatus, false);
    } else {
        LOG.debug("rename: renaming directory {} to {}", src, dst);
        // This is a directory to directory copy
        if (!dstKey.endsWith("/")) {
            dstKey = dstKey + "/";
        }
        if (!srcKey.endsWith("/")) {
            srcKey = srcKey + "/";
        }
        //Verify dest is not a child of the source directory
        if (dstKey.startsWith(srcKey)) {
            throw new RenameFailedException(srcKey, dstKey, "cannot rename a directory to a subdirectory o fitself ");
        }
        List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
        if (dstStatus != null && dstStatus.isEmptyDirectory()) {
            // delete unnecessary fake directory.
            keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
        }
        ListObjectsRequest request = new ListObjectsRequest();
        request.setBucketName(bucket);
        request.setPrefix(srcKey);
        request.setMaxKeys(maxKeys);
        ObjectListing objects = listObjects(request);
        while (true) {
            for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                keysToDelete.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
                String newDstKey = dstKey + summary.getKey().substring(srcKey.length());
                copyFile(summary.getKey(), newDstKey, summary.getSize());
                if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
                    removeKeys(keysToDelete, true, false);
                }
            }
            if (objects.isTruncated()) {
                objects = continueListObjects(objects);
            } else {
                if (!keysToDelete.isEmpty()) {
                    removeKeys(keysToDelete, false, false);
                }
                break;
            }
        }
    }
    if (src.getParent() != dst.getParent()) {
        deleteUnnecessaryFakeDirectories(dst.getParent());
        createFakeDirectoryIfNecessary(src.getParent());
    }
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest)

Example 3 with S3ObjectSummary

use of com.amazonaws.services.s3.model.S3ObjectSummary in project crate by crate.

the class FileReadingCollectorTest method createBatchIterator.

private BatchIterator createBatchIterator(Collection<String> fileUris, String compression, final S3ObjectInputStream s3InputStream) {
    Reference raw = createReference("_raw", DataTypes.STRING);
    InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(FileLineReferenceResolver::getImplementation);
    List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
    return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, ImmutableMap.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory(), S3FileInputFactory.NAME, () -> new S3FileInput(new S3ClientHelper() {

        @Override
        protected AmazonS3 initClient(String accessKey, String secretKey) throws IOException {
            AmazonS3 client = mock(AmazonS3Client.class);
            ObjectListing objectListing = mock(ObjectListing.class);
            S3ObjectSummary summary = mock(S3ObjectSummary.class);
            S3Object s3Object = mock(S3Object.class);
            when(client.listObjects(anyString(), anyString())).thenReturn(objectListing);
            when(objectListing.getObjectSummaries()).thenReturn(Arrays.asList(summary));
            when(summary.getKey()).thenReturn("foo");
            when(client.getObject("fakebucket", "foo")).thenReturn(s3Object);
            when(s3Object.getObjectContent()).thenReturn(s3InputStream);
            when(client.listNextBatchOfObjects(any(ObjectListing.class))).thenReturn(objectListing);
            when(objectListing.isTruncated()).thenReturn(false);
            return client;
        }
    })), false, 1, 0);
}
Also used : InputFactory(io.crate.operation.InputFactory) AmazonS3(com.amazonaws.services.s3.AmazonS3) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) AmazonS3Client(com.amazonaws.services.s3.AmazonS3Client) FileLineReferenceResolver(io.crate.operation.reference.file.FileLineReferenceResolver) S3ClientHelper(io.crate.external.S3ClientHelper) S3Object(com.amazonaws.services.s3.model.S3Object)

Example 4 with S3ObjectSummary

use of com.amazonaws.services.s3.model.S3ObjectSummary in project crate by crate.

the class S3FileInputTest method objectSummaries.

private List<S3ObjectSummary> objectSummaries() {
    listObjectSummaries = new LinkedList<>();
    S3ObjectSummary firstObj = new S3ObjectSummary();
    S3ObjectSummary secondObj = new S3ObjectSummary();
    firstObj.setBucketName(BUCKET_NAME);
    secondObj.setBucketName(BUCKET_NAME);
    firstObj.setKey("prefix/test1.json.gz");
    secondObj.setKey("prefix/test2.json.gz");
    listObjectSummaries.add(firstObj);
    listObjectSummaries.add(secondObj);
    return listObjectSummaries;
}
Also used : S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary)

Example 5 with S3ObjectSummary

use of com.amazonaws.services.s3.model.S3ObjectSummary in project jackrabbit-oak by apache.

the class S3Backend method deleteAllOlderThan.

@Override
public Set<DataIdentifier> deleteAllOlderThan(long min) throws DataStoreException {
    long start = System.currentTimeMillis();
    // S3 stores lastModified to lower boundary of timestamp in ms.
    // and hence min is reduced by 1000ms.
    min = min - 1000;
    Set<DataIdentifier> deleteIdSet = new HashSet<DataIdentifier>(30);
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        while (true) {
            List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                if (!s3ObjSumm.getKey().startsWith(META_KEY_PREFIX)) {
                    DataIdentifier identifier = new DataIdentifier(getIdentifierName(s3ObjSumm.getKey()));
                    long lastModified = s3ObjSumm.getLastModified().getTime();
                    LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
                    if (lastModified < min && store.confirmDelete(identifier) && //  order is important here
                    s3service.getObjectMetadata(bucket, s3ObjSumm.getKey()).getLastModified().getTime() < min) {
                        store.deleteFromCache(identifier);
                        LOG.debug("add id [{}] to delete lists", s3ObjSumm.getKey());
                        deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
                        deleteIdSet.add(identifier);
                    }
                }
            }
            if (deleteList.size() > 0) {
                DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
                delObjsReq.setKeys(deleteList);
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                if (dobjs.getDeletedObjects().size() != deleteList.size()) {
                    throw new DataStoreException("Incomplete delete object request. only  " + dobjs.getDeletedObjects().size() + " out of " + deleteList.size() + " are deleted");
                } else {
                    LOG.debug("[{}] records deleted from datastore", deleteList);
                }
            }
            if (!prevObjectListing.isTruncated()) {
                break;
            }
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
    LOG.info("deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms", new Object[] { min, deleteIdSet, deleteIdSet.size(), (System.currentTimeMillis() - start) });
    return deleteIdSet;
}
Also used : DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) DataIdentifier(org.apache.jackrabbit.core.data.DataIdentifier) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) HashSet(java.util.HashSet)

Aggregations

S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)194 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)105 ArrayList (java.util.ArrayList)64 ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)61 Test (org.junit.Test)50 Date (java.util.Date)29 DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)27 ListObjectsV2Result (com.amazonaws.services.s3.model.ListObjectsV2Result)25 Test (org.testng.annotations.Test)25 AmazonS3 (com.amazonaws.services.s3.AmazonS3)23 S3Object (com.amazonaws.services.s3.model.S3Object)19 AmazonClientException (com.amazonaws.AmazonClientException)18 IOException (java.io.IOException)17 S3FileTransferRequestParamsDto (org.finra.herd.model.dto.S3FileTransferRequestParamsDto)16 AmazonServiceException (com.amazonaws.AmazonServiceException)14 ListObjectsV2Request (com.amazonaws.services.s3.model.ListObjectsV2Request)14 File (java.io.File)13 HashMap (java.util.HashMap)13 BusinessObjectDataKey (org.finra.herd.model.api.xml.BusinessObjectDataKey)13 StorageFile (org.finra.herd.model.api.xml.StorageFile)13