use of com.amazonaws.services.s3.model.Filter in project trino by trinodb.
the class TrinoS3FileSystem method listPrefix.
/**
* List all objects whose absolute path matches the provided prefix.
*/
private Iterator<LocatedFileStatus> listPrefix(String prefix, OptionalInt initialMaxKeys, ListingMode mode) {
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(getBucketName(uri)).withPrefix(prefix).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null).withRequesterPays(requesterPaysEnabled);
STATS.newListObjectsCall();
Iterator<ListObjectsV2Result> listings = new AbstractSequentialIterator<>(s3.listObjectsV2(request)) {
@Override
protected ListObjectsV2Result computeNext(ListObjectsV2Result previous) {
if (!previous.isTruncated()) {
return null;
}
// Clear any max keys after the first batch completes
request.withMaxKeys(null).setContinuationToken(previous.getNextContinuationToken());
return s3.listObjectsV2(request);
}
};
Iterator<LocatedFileStatus> results = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
if (mode.isFilesOnly()) {
// Even recursive listing can still contain empty "directory" objects, must filter them out
results = Iterators.filter(results, LocatedFileStatus::isFile);
}
return results;
}
use of com.amazonaws.services.s3.model.Filter in project sirius-biz by scireum.
the class ObjectStore method listObjects.
/**
* Iterates of all objects in a bucket.
* <p>
* Keep in mind that a bucket might contain a very large amount of objects. This method must be used with care and
* a lot of thought.
*
* @param bucket the bucket to list objects for
* @param prefix the object name prefix used to filter
* @param consumer the consumer to be supplied with each found object. As soon as <tt>false</tt> is returned,
* the iteration stops.
*/
public void listObjects(BucketName bucket, @Nullable String prefix, Predicate<S3ObjectSummary> consumer) {
ObjectListing objectListing = null;
TaskContext taskContext = CallContext.getCurrent().get(TaskContext.class);
do {
try (Operation operation = new Operation(() -> Strings.apply("S3: Fetching objects from %s (prefix: %s)", bucket.getName(), prefix), Duration.ofSeconds(10))) {
if (objectListing != null) {
objectListing = getClient().listNextBatchOfObjects(objectListing);
} else {
objectListing = getClient().listObjects(bucket.getName(), prefix);
}
}
for (S3ObjectSummary obj : objectListing.getObjectSummaries()) {
if (!consumer.test(obj) || !taskContext.isActive()) {
return;
}
}
} while (objectListing.isTruncated() && taskContext.isActive());
}
use of com.amazonaws.services.s3.model.Filter in project presto by prestodb.
the class PrestoS3FileSystem method listPrefix.
private Iterator<LocatedFileStatus> listPrefix(Path path, OptionalInt initialMaxKeys, ListingMode mode) {
String key = keyFromPath(path);
if (!key.isEmpty()) {
key += PATH_SEPARATOR;
}
ListObjectsRequest request = new ListObjectsRequest().withBucketName(getBucketName(uri)).withPrefix(key).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null);
STATS.newListObjectsCall();
Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {
@Override
protected ObjectListing computeNext(ObjectListing previous) {
if (!previous.isTruncated()) {
return null;
}
// Clear any max keys set for the initial request before submitting subsequent requests. Values < 0
// are not sent in the request and the default limit is used
previous.setMaxKeys(-1);
return s3.listNextBatchOfObjects(previous);
}
};
Iterator<LocatedFileStatus> result = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
if (mode.isFilesOnly()) {
// Even recursive listing can still contain empty "directory" objects, must filter them out
result = Iterators.filter(result, LocatedFileStatus::isFile);
}
return result;
}
use of com.amazonaws.services.s3.model.Filter in project druid by druid-io.
the class EC2AutoScaler method idToIpLookup.
@Override
public List<String> idToIpLookup(List<String> nodeIds) {
final List<String> retVal = FluentIterable.from(Lists.partition(nodeIds, MAX_AWS_FILTER_VALUES)).transformAndConcat(new Function<List<String>, Iterable<Reservation>>() {
@Override
public Iterable<Reservation> apply(List<String> input) {
return amazonEC2Client.describeInstances(new DescribeInstancesRequest().withFilters(new Filter("instance-id", input))).getReservations();
}
}).transformAndConcat(new Function<Reservation, Iterable<Instance>>() {
@Override
public Iterable<Instance> apply(Reservation reservation) {
return reservation.getInstances();
}
}).transform(new Function<Instance, String>() {
@Override
public String apply(Instance instance) {
return instance.getPrivateIpAddress();
}
}).toList();
log.debug("Performing lookup: %s --> %s", nodeIds, retVal);
return retVal;
}
use of com.amazonaws.services.s3.model.Filter in project beam by apache.
the class S3FileSystem method expandGlob.
private ExpandedGlob expandGlob(S3ResourceId glob) {
// The S3 API can list objects, filtered by prefix, but not by wildcard.
// Here, we find the longest prefix without wildcard "*",
// then filter the results with a regex.
checkArgument(glob.isWildcard(), "isWildcard");
String keyPrefix = glob.getKeyNonWildcardPrefix();
Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey()));
LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp.toString());
ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder();
String continuationToken = null;
do {
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(glob.getBucket()).withPrefix(keyPrefix).withContinuationToken(continuationToken);
ListObjectsV2Result result;
try {
result = amazonS3.get().listObjectsV2(request);
} catch (AmazonClientException e) {
return ExpandedGlob.create(glob, new IOException(e));
}
continuationToken = result.getNextContinuationToken();
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
// Filter against regex.
if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) {
S3ResourceId expandedPath = S3ResourceId.fromComponents(glob.getScheme(), objectSummary.getBucketName(), objectSummary.getKey()).withSize(objectSummary.getSize()).withLastModified(objectSummary.getLastModified());
LOG.debug("Expanded S3 object path {}", expandedPath);
expandedPaths.add(expandedPath);
}
}
} while (continuationToken != null);
return ExpandedGlob.create(glob, expandedPaths.build());
}
Aggregations