use of com.amazonaws.services.s3.model.S3ObjectSummary in project deeplearning4j by deeplearning4j.
the class S3Downloader method keysForBucket.
/**
* Return the keys for a bucket
* @param bucket the bucket to get the keys for
* @return the bucket's keys
*/
public List<String> keysForBucket(String bucket) {
AmazonS3 s3 = getClient();
List<String> ret = new ArrayList<>();
ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket);
ObjectListing objectListing;
do {
objectListing = s3.listObjects(listObjectsRequest);
for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
ret.add(objectSummary.getKey());
}
listObjectsRequest.setMarker(objectListing.getNextMarker());
} while (objectListing.isTruncated());
return ret;
}
use of com.amazonaws.services.s3.model.S3ObjectSummary in project hadoop by apache.
the class S3AFileSystem method innerRename.
/**
* The inner rename operation. See {@link #rename(Path, Path)} for
* the description of the operation.
* This operation throws an exception on any failure which needs to be
* reported and downgraded to a failure. That is: if a rename
* @param src path to be renamed
* @param dst new path after rename
* @throws RenameFailedException if some criteria for a state changing
* rename was not met. This means work didn't happen; it's not something
* which is reported upstream to the FileSystem APIs, for which the semantics
* of "false" are pretty vague.
* @throws FileNotFoundException there's no source file.
* @throws IOException on IO failure.
* @throws AmazonClientException on failures inside the AWS SDK
*/
private boolean innerRename(Path src, Path dst) throws RenameFailedException, FileNotFoundException, IOException, AmazonClientException {
LOG.debug("Rename path {} to {}", src, dst);
incrementStatistic(INVOCATION_RENAME);
String srcKey = pathToKey(src);
String dstKey = pathToKey(dst);
if (srcKey.isEmpty()) {
throw new RenameFailedException(src, dst, "source is root directory");
}
if (dstKey.isEmpty()) {
throw new RenameFailedException(src, dst, "dest is root directory");
}
// get the source file status; this raises a FNFE if there is no source
// file.
S3AFileStatus srcStatus = getFileStatus(src);
if (srcKey.equals(dstKey)) {
LOG.debug("rename: src and dest refer to the same file or directory: {}", dst);
throw new RenameFailedException(src, dst, "source and dest refer to the same file or directory").withExitCode(srcStatus.isFile());
}
S3AFileStatus dstStatus = null;
try {
dstStatus = getFileStatus(dst);
// whether or not it can be the destination of the rename.
if (srcStatus.isDirectory()) {
if (dstStatus.isFile()) {
throw new RenameFailedException(src, dst, "source is a directory and dest is a file").withExitCode(srcStatus.isFile());
} else if (!dstStatus.isEmptyDirectory()) {
throw new RenameFailedException(src, dst, "Destination is a non-empty directory").withExitCode(false);
}
// at this point the destination is an empty directory
} else {
// empty or not
if (dstStatus.isFile()) {
throw new RenameFailedException(src, dst, "Cannot rename onto an existing file").withExitCode(false);
}
}
} catch (FileNotFoundException e) {
LOG.debug("rename: destination path {} not found", dst);
// Parent must exist
Path parent = dst.getParent();
if (!pathToKey(parent).isEmpty()) {
try {
S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
if (!dstParentStatus.isDirectory()) {
throw new RenameFailedException(src, dst, "destination parent is not a directory");
}
} catch (FileNotFoundException e2) {
throw new RenameFailedException(src, dst, "destination has no parent ");
}
}
}
// Ok! Time to start
if (srcStatus.isFile()) {
LOG.debug("rename: renaming file {} to {}", src, dst);
if (dstStatus != null && dstStatus.isDirectory()) {
String newDstKey = dstKey;
if (!newDstKey.endsWith("/")) {
newDstKey = newDstKey + "/";
}
String filename = srcKey.substring(pathToKey(src.getParent()).length() + 1);
newDstKey = newDstKey + filename;
copyFile(srcKey, newDstKey, srcStatus.getLen());
} else {
copyFile(srcKey, dstKey, srcStatus.getLen());
}
innerDelete(srcStatus, false);
} else {
LOG.debug("rename: renaming directory {} to {}", src, dst);
// This is a directory to directory copy
if (!dstKey.endsWith("/")) {
dstKey = dstKey + "/";
}
if (!srcKey.endsWith("/")) {
srcKey = srcKey + "/";
}
//Verify dest is not a child of the source directory
if (dstKey.startsWith(srcKey)) {
throw new RenameFailedException(srcKey, dstKey, "cannot rename a directory to a subdirectory o fitself ");
}
List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
if (dstStatus != null && dstStatus.isEmptyDirectory()) {
// delete unnecessary fake directory.
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
}
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(bucket);
request.setPrefix(srcKey);
request.setMaxKeys(maxKeys);
ObjectListing objects = listObjects(request);
while (true) {
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
String newDstKey = dstKey + summary.getKey().substring(srcKey.length());
copyFile(summary.getKey(), newDstKey, summary.getSize());
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
removeKeys(keysToDelete, true, false);
}
}
if (objects.isTruncated()) {
objects = continueListObjects(objects);
} else {
if (!keysToDelete.isEmpty()) {
removeKeys(keysToDelete, false, false);
}
break;
}
}
}
if (src.getParent() != dst.getParent()) {
deleteUnnecessaryFakeDirectories(dst.getParent());
createFakeDirectoryIfNecessary(src.getParent());
}
return true;
}
use of com.amazonaws.services.s3.model.S3ObjectSummary in project crate by crate.
the class FileReadingCollectorTest method createBatchIterator.
private BatchIterator createBatchIterator(Collection<String> fileUris, String compression, final S3ObjectInputStream s3InputStream) {
Reference raw = createReference("_raw", DataTypes.STRING);
InputFactory.Context<LineCollectorExpression<?>> ctx = inputFactory.ctxForRefs(FileLineReferenceResolver::getImplementation);
List<Input<?>> inputs = Collections.singletonList(ctx.add(raw));
return FileReadingIterator.newInstance(fileUris, inputs, ctx.expressions(), compression, ImmutableMap.of(LocalFsFileInputFactory.NAME, new LocalFsFileInputFactory(), S3FileInputFactory.NAME, () -> new S3FileInput(new S3ClientHelper() {
@Override
protected AmazonS3 initClient(String accessKey, String secretKey) throws IOException {
AmazonS3 client = mock(AmazonS3Client.class);
ObjectListing objectListing = mock(ObjectListing.class);
S3ObjectSummary summary = mock(S3ObjectSummary.class);
S3Object s3Object = mock(S3Object.class);
when(client.listObjects(anyString(), anyString())).thenReturn(objectListing);
when(objectListing.getObjectSummaries()).thenReturn(Arrays.asList(summary));
when(summary.getKey()).thenReturn("foo");
when(client.getObject("fakebucket", "foo")).thenReturn(s3Object);
when(s3Object.getObjectContent()).thenReturn(s3InputStream);
when(client.listNextBatchOfObjects(any(ObjectListing.class))).thenReturn(objectListing);
when(objectListing.isTruncated()).thenReturn(false);
return client;
}
})), false, 1, 0);
}
use of com.amazonaws.services.s3.model.S3ObjectSummary in project crate by crate.
the class S3FileInputTest method objectSummaries.
private List<S3ObjectSummary> objectSummaries() {
listObjectSummaries = new LinkedList<>();
S3ObjectSummary firstObj = new S3ObjectSummary();
S3ObjectSummary secondObj = new S3ObjectSummary();
firstObj.setBucketName(BUCKET_NAME);
secondObj.setBucketName(BUCKET_NAME);
firstObj.setKey("prefix/test1.json.gz");
secondObj.setKey("prefix/test2.json.gz");
listObjectSummaries.add(firstObj);
listObjectSummaries.add(secondObj);
return listObjectSummaries;
}
use of com.amazonaws.services.s3.model.S3ObjectSummary in project jackrabbit-oak by apache.
the class S3Backend method deleteAllOlderThan.
@Override
public Set<DataIdentifier> deleteAllOlderThan(long min) throws DataStoreException {
long start = System.currentTimeMillis();
// S3 stores lastModified to lower boundary of timestamp in ms.
// and hence min is reduced by 1000ms.
min = min - 1000;
Set<DataIdentifier> deleteIdSet = new HashSet<DataIdentifier>(30);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
ObjectListing prevObjectListing = s3service.listObjects(bucket);
while (true) {
List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
if (!s3ObjSumm.getKey().startsWith(META_KEY_PREFIX)) {
DataIdentifier identifier = new DataIdentifier(getIdentifierName(s3ObjSumm.getKey()));
long lastModified = s3ObjSumm.getLastModified().getTime();
LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
if (lastModified < min && store.confirmDelete(identifier) && // order is important here
s3service.getObjectMetadata(bucket, s3ObjSumm.getKey()).getLastModified().getTime() < min) {
store.deleteFromCache(identifier);
LOG.debug("add id [{}] to delete lists", s3ObjSumm.getKey());
deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
deleteIdSet.add(identifier);
}
}
}
if (deleteList.size() > 0) {
DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
delObjsReq.setKeys(deleteList);
DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
if (dobjs.getDeletedObjects().size() != deleteList.size()) {
throw new DataStoreException("Incomplete delete object request. only " + dobjs.getDeletedObjects().size() + " out of " + deleteList.size() + " are deleted");
} else {
LOG.debug("[{}] records deleted from datastore", deleteList);
}
}
if (!prevObjectListing.isTruncated()) {
break;
}
prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
}
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
LOG.info("deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms", new Object[] { min, deleteIdSet, deleteIdSet.size(), (System.currentTimeMillis() - start) });
return deleteIdSet;
}
Aggregations