use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project presto by prestodb.
the class PrestoS3FileSystem method listPrefix.
private Iterator<LocatedFileStatus> listPrefix(Path path) {
String key = keyFromPath(path);
if (!key.isEmpty()) {
key += PATH_SEPARATOR;
}
ListObjectsRequest request = new ListObjectsRequest().withBucketName(uri.getHost()).withPrefix(key).withDelimiter(PATH_SEPARATOR);
STATS.newListObjectsCall();
Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {
@Override
protected ObjectListing computeNext(ObjectListing previous) {
if (!previous.isTruncated()) {
return null;
}
return s3.listNextBatchOfObjects(previous);
}
};
return Iterators.concat(Iterators.transform(listings, this::statusFromListing));
}
use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project nifi by apache.
the class TestListS3 method testListIgnoreByMinAge.
@Test
public void testListIgnoreByMinAge() throws IOException {
runner.setProperty(ListS3.REGION, "eu-west-1");
runner.setProperty(ListS3.BUCKET, "test-bucket");
runner.setProperty(ListS3.MIN_AGE, "30 sec");
Date lastModifiedNow = new Date();
Date lastModifiedMinus1Hour = DateUtils.addHours(lastModifiedNow, -1);
Date lastModifiedMinus3Hour = DateUtils.addHours(lastModifiedNow, -3);
ObjectListing objectListing = new ObjectListing();
S3ObjectSummary objectSummary1 = new S3ObjectSummary();
objectSummary1.setBucketName("test-bucket");
objectSummary1.setKey("minus-3hour");
objectSummary1.setLastModified(lastModifiedMinus3Hour);
objectListing.getObjectSummaries().add(objectSummary1);
S3ObjectSummary objectSummary2 = new S3ObjectSummary();
objectSummary2.setBucketName("test-bucket");
objectSummary2.setKey("minus-1hour");
objectSummary2.setLastModified(lastModifiedMinus1Hour);
objectListing.getObjectSummaries().add(objectSummary2);
S3ObjectSummary objectSummary3 = new S3ObjectSummary();
objectSummary3.setBucketName("test-bucket");
objectSummary3.setKey("now");
objectSummary3.setLastModified(lastModifiedNow);
objectListing.getObjectSummaries().add(objectSummary3);
Mockito.when(mockS3Client.listObjects(Mockito.any(ListObjectsRequest.class))).thenReturn(objectListing);
Map<String, String> stateMap = new HashMap<>();
String previousTimestamp = String.valueOf(lastModifiedMinus3Hour.getTime());
stateMap.put(ListS3.CURRENT_TIMESTAMP, previousTimestamp);
stateMap.put(ListS3.CURRENT_KEY_PREFIX + "0", "minus-3hour");
runner.getStateManager().setState(stateMap, Scope.CLUSTER);
runner.run();
ArgumentCaptor<ListObjectsRequest> captureRequest = ArgumentCaptor.forClass(ListObjectsRequest.class);
Mockito.verify(mockS3Client, Mockito.times(1)).listObjects(captureRequest.capture());
ListObjectsRequest request = captureRequest.getValue();
assertEquals("test-bucket", request.getBucketName());
Mockito.verify(mockS3Client, Mockito.never()).listVersions(Mockito.any());
runner.assertAllFlowFilesTransferred(ListS3.REL_SUCCESS, 1);
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(ListS3.REL_SUCCESS);
MockFlowFile ff0 = flowFiles.get(0);
ff0.assertAttributeEquals("filename", "minus-1hour");
ff0.assertAttributeEquals("s3.bucket", "test-bucket");
String lastModifiedTimestamp = String.valueOf(lastModifiedMinus1Hour.getTime());
ff0.assertAttributeEquals("s3.lastModified", lastModifiedTimestamp);
runner.getStateManager().assertStateEquals(ListS3.CURRENT_TIMESTAMP, lastModifiedTimestamp, Scope.CLUSTER);
}
use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project nifi by apache.
the class TestListS3 method testListObjectsNothingNew.
@Test
public void testListObjectsNothingNew() throws IOException {
runner.setProperty(ListS3.REGION, "eu-west-1");
runner.setProperty(ListS3.BUCKET, "test-bucket");
Calendar calendar = Calendar.getInstance();
calendar.set(2017, 5, 2);
Date objectLastModified = calendar.getTime();
long stateCurrentTimestamp = objectLastModified.getTime();
Map<String, String> state = new HashMap<>();
state.put(ListS3.CURRENT_TIMESTAMP, String.valueOf(stateCurrentTimestamp));
state.put(ListS3.CURRENT_KEY_PREFIX + "0", "test-key");
MockStateManager mockStateManager = runner.getStateManager();
mockStateManager.setState(state, Scope.CLUSTER);
ObjectListing objectListing = new ObjectListing();
S3ObjectSummary objectSummary1 = new S3ObjectSummary();
objectSummary1.setBucketName("test-bucket");
objectSummary1.setKey("test-key");
objectSummary1.setLastModified(objectLastModified);
objectListing.getObjectSummaries().add(objectSummary1);
Mockito.when(mockS3Client.listObjects(Mockito.any(ListObjectsRequest.class))).thenReturn(objectListing);
runner.run();
ArgumentCaptor<ListObjectsRequest> captureRequest = ArgumentCaptor.forClass(ListObjectsRequest.class);
Mockito.verify(mockS3Client, Mockito.times(1)).listObjects(captureRequest.capture());
ListObjectsRequest request = captureRequest.getValue();
assertEquals("test-bucket", request.getBucketName());
Mockito.verify(mockS3Client, Mockito.never()).listVersions(Mockito.any());
runner.assertAllFlowFilesTransferred(ListS3.REL_SUCCESS, 0);
}
use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project stocator by SparkTC.
the class COSAPIClient method list.
@Override
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException {
LOG.debug("Native direct list status for {}", path);
ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
String key = pathToKey(path);
if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/") && !path.toString().equals(hostName)) {
key = key + "/";
LOG.debug("listNativeDirect modify key to {}", key);
}
Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
ListObjectsRequest request = new ListObjectsRequest();
request.setBucketName(mBucket);
request.setMaxKeys(5000);
request.setPrefix(key);
if (!flatListing) {
request.setDelimiter("/");
}
ObjectListing objectList = mClient.listObjects(request);
List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
List<String> commonPrefixes = objectList.getCommonPrefixes();
boolean objectScanContinue = true;
S3ObjectSummary prevObj = null;
// start FTA logic
boolean stocatorOrigin = isSparkOrigin(key, path.toString());
if (stocatorOrigin) {
LOG.debug("Stocator origin is true for {}", key);
if (!isJobSuccessful(key)) {
LOG.debug("{} created by failed Spark job. Skipped", key);
if (fModeAutomaticDelete) {
delete(hostName, new Path(key), true);
}
return new FileStatus[0];
}
}
while (objectScanContinue) {
for (S3ObjectSummary obj : objectSummaries) {
if (prevObj == null) {
prevObj = obj;
prevObj.setKey(correctPlusSign(key, prevObj.getKey()));
continue;
}
obj.setKey(correctPlusSign(key, obj.getKey()));
String objKey = obj.getKey();
String unifiedObjectName = extractUnifiedObjectName(objKey);
LOG.trace("list candidate {}, unified name {}", objKey, unifiedObjectName);
if (stocatorOrigin && !fullListing) {
LOG.trace("{} created by Spark", unifiedObjectName);
// we need to make sure there are no failed attempts
if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
// found failed that was not aborted.
LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
if (prevObj.getSize() < obj.getSize()) {
LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
prevObj = obj;
}
continue;
}
}
FileStatus fs = createFileStatus(prevObj, hostName, path);
if (fs.getLen() > 0 || fullListing) {
LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
if (filter == null) {
tmpResult.add(fs);
} else if (filter != null && filter.accept(fs.getPath())) {
tmpResult.add(fs);
} else {
LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
}
} else {
emptyObjects.put(fs.getPath().toString(), fs);
}
prevObj = obj;
}
boolean isTruncated = objectList.isTruncated();
if (isTruncated) {
objectList = mClient.listNextBatchOfObjects(objectList);
objectSummaries = objectList.getObjectSummaries();
} else {
objectScanContinue = false;
}
}
if (prevObj != null) {
FileStatus fs = createFileStatus(prevObj, hostName, path);
LOG.trace("Adding the last object from the list {}", fs.getPath());
if (fs.getLen() > 0 || fullListing) {
LOG.trace("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
if (filter == null) {
memoryCache.putFileStatus(fs.getPath().toString(), fs);
tmpResult.add(fs);
} else if (filter != null && filter.accept(fs.getPath())) {
memoryCache.putFileStatus(fs.getPath().toString(), fs);
tmpResult.add(fs);
} else {
LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
}
} else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
emptyObjects.put(fs.getPath().toString(), fs);
}
}
// get common prefixes
for (String comPrefix : commonPrefixes) {
LOG.trace("Common prefix is {}", comPrefix);
if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) {
FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix));
LOG.trace("Match between common prefix and empty object {}. Adding to result", comPrefix);
if (filter == null) {
memoryCache.putFileStatus(status.getPath().toString(), status);
tmpResult.add(status);
} else if (filter != null && filter.accept(status.getPath())) {
memoryCache.putFileStatus(status.getPath().toString(), status);
tmpResult.add(status);
} else {
LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter);
}
}
}
return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project athenz by yahoo.
the class S3ChangeLogStore method listObjects.
/**
* list the objects in the zts bucket. If the mod time is specified as 0
* then we want to list all objects otherwise, we only list objects
* that are newer than the specified timestamp
* @param s3 AWS S3 client object
* @param domains collection to be updated to include domain names
* @param modTime only include domains newer than this timestamp
*/
void listObjects(AmazonS3 s3, Collection<String> domains, long modTime) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("listObjects: Retrieving domains from {} with mod time > {}", s3BucketName, modTime);
}
ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(s3BucketName));
String objectName = null;
while (objectListing != null) {
// process each entry in our result set and add the domain
// name to our return list
final List<S3ObjectSummary> objectSummaries = objectListing.getObjectSummaries();
boolean listTruncated = objectListing.isTruncated();
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("listObjects: retrieved {} objects, more objects available - {}", objectSummaries.size(), listTruncated);
}
for (S3ObjectSummary objectSummary : objectSummaries) {
if (modTime > 0 && objectSummary.getLastModified().getTime() <= modTime) {
continue;
}
// for now skip any folders/objects that start with '.'
objectName = objectSummary.getKey();
if (objectName.charAt(0) == '.') {
continue;
}
domains.add(objectName);
}
if (!listTruncated) {
break;
}
objectListing = s3.listNextBatchOfObjects(objectListing);
}
}
Aggregations