Search in sources :

Example 21 with Filter

use of com.amazonaws.services.s3.model.Filter in project trino by trinodb.

the class TrinoS3FileSystem method listPrefix.

/**
 * List all objects whose absolute path matches the provided prefix.
 */
private Iterator<LocatedFileStatus> listPrefix(String prefix, OptionalInt initialMaxKeys, ListingMode mode) {
    ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(getBucketName(uri)).withPrefix(prefix).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null).withRequesterPays(requesterPaysEnabled);
    STATS.newListObjectsCall();
    Iterator<ListObjectsV2Result> listings = new AbstractSequentialIterator<>(s3.listObjectsV2(request)) {

        @Override
        protected ListObjectsV2Result computeNext(ListObjectsV2Result previous) {
            if (!previous.isTruncated()) {
                return null;
            }
            // Clear any max keys after the first batch completes
            request.withMaxKeys(null).setContinuationToken(previous.getNextContinuationToken());
            return s3.listObjectsV2(request);
        }
    };
    Iterator<LocatedFileStatus> results = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
    if (mode.isFilesOnly()) {
        // Even recursive listing can still contain empty "directory" objects, must filter them out
        results = Iterators.filter(results, LocatedFileStatus::isFile);
    }
    return results;
}
Also used : ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator)

Example 22 with Filter

use of com.amazonaws.services.s3.model.Filter in project sirius-biz by scireum.

the class ObjectStore method listObjects.

/**
 * Iterates of all objects in a bucket.
 * <p>
 * Keep in mind that a bucket might contain a very large amount of objects. This method must be used with care and
 * a lot of thought.
 *
 * @param bucket   the bucket to list objects for
 * @param prefix   the object name prefix used to filter
 * @param consumer the consumer to be supplied with each found object. As soon as <tt>false</tt> is returned,
 *                 the iteration stops.
 */
public void listObjects(BucketName bucket, @Nullable String prefix, Predicate<S3ObjectSummary> consumer) {
    ObjectListing objectListing = null;
    TaskContext taskContext = CallContext.getCurrent().get(TaskContext.class);
    do {
        try (Operation operation = new Operation(() -> Strings.apply("S3: Fetching objects from %s (prefix: %s)", bucket.getName(), prefix), Duration.ofSeconds(10))) {
            if (objectListing != null) {
                objectListing = getClient().listNextBatchOfObjects(objectListing);
            } else {
                objectListing = getClient().listObjects(bucket.getName(), prefix);
            }
        }
        for (S3ObjectSummary obj : objectListing.getObjectSummaries()) {
            if (!consumer.test(obj) || !taskContext.isActive()) {
                return;
            }
        }
    } while (objectListing.isTruncated() && taskContext.isActive());
}
Also used : TaskContext(sirius.kernel.async.TaskContext) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Operation(sirius.kernel.async.Operation)

Example 23 with Filter

use of com.amazonaws.services.s3.model.Filter in project presto by prestodb.

the class PrestoS3FileSystem method listPrefix.

private Iterator<LocatedFileStatus> listPrefix(Path path, OptionalInt initialMaxKeys, ListingMode mode) {
    String key = keyFromPath(path);
    if (!key.isEmpty()) {
        key += PATH_SEPARATOR;
    }
    ListObjectsRequest request = new ListObjectsRequest().withBucketName(getBucketName(uri)).withPrefix(key).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null);
    STATS.newListObjectsCall();
    Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {

        @Override
        protected ObjectListing computeNext(ObjectListing previous) {
            if (!previous.isTruncated()) {
                return null;
            }
            // Clear any max keys set for the initial request before submitting subsequent requests. Values < 0
            // are not sent in the request and the default limit is used
            previous.setMaxKeys(-1);
            return s3.listNextBatchOfObjects(previous);
        }
    };
    Iterator<LocatedFileStatus> result = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
    if (mode.isFilesOnly()) {
        // Even recursive listing can still contain empty "directory" objects, must filter them out
        result = Iterators.filter(result, LocatedFileStatus::isFile);
    }
    return result;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) AbstractSequentialIterator(com.google.common.collect.AbstractSequentialIterator)

Example 24 with Filter

use of com.amazonaws.services.s3.model.Filter in project druid by druid-io.

the class EC2AutoScaler method idToIpLookup.

@Override
public List<String> idToIpLookup(List<String> nodeIds) {
    final List<String> retVal = FluentIterable.from(Lists.partition(nodeIds, MAX_AWS_FILTER_VALUES)).transformAndConcat(new Function<List<String>, Iterable<Reservation>>() {

        @Override
        public Iterable<Reservation> apply(List<String> input) {
            return amazonEC2Client.describeInstances(new DescribeInstancesRequest().withFilters(new Filter("instance-id", input))).getReservations();
        }
    }).transformAndConcat(new Function<Reservation, Iterable<Instance>>() {

        @Override
        public Iterable<Instance> apply(Reservation reservation) {
            return reservation.getInstances();
        }
    }).transform(new Function<Instance, String>() {

        @Override
        public String apply(Instance instance) {
            return instance.getPrivateIpAddress();
        }
    }).toList();
    log.debug("Performing lookup: %s --> %s", nodeIds, retVal);
    return retVal;
}
Also used : Function(com.google.common.base.Function) Reservation(com.amazonaws.services.ec2.model.Reservation) FluentIterable(com.google.common.collect.FluentIterable) Filter(com.amazonaws.services.ec2.model.Filter) Instance(com.amazonaws.services.ec2.model.Instance) ArrayList(java.util.ArrayList) List(java.util.List) DescribeInstancesRequest(com.amazonaws.services.ec2.model.DescribeInstancesRequest)

Example 25 with Filter

use of com.amazonaws.services.s3.model.Filter in project beam by apache.

the class S3FileSystem method expandGlob.

private ExpandedGlob expandGlob(S3ResourceId glob) {
    // The S3 API can list objects, filtered by prefix, but not by wildcard.
    // Here, we find the longest prefix without wildcard "*",
    // then filter the results with a regex.
    checkArgument(glob.isWildcard(), "isWildcard");
    String keyPrefix = glob.getKeyNonWildcardPrefix();
    Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey()));
    LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp.toString());
    ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder();
    String continuationToken = null;
    do {
        ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(glob.getBucket()).withPrefix(keyPrefix).withContinuationToken(continuationToken);
        ListObjectsV2Result result;
        try {
            result = amazonS3.get().listObjectsV2(request);
        } catch (AmazonClientException e) {
            return ExpandedGlob.create(glob, new IOException(e));
        }
        continuationToken = result.getNextContinuationToken();
        for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
            // Filter against regex.
            if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) {
                S3ResourceId expandedPath = S3ResourceId.fromComponents(glob.getScheme(), objectSummary.getBucketName(), objectSummary.getKey()).withSize(objectSummary.getSize()).withLastModified(objectSummary.getLastModified());
                LOG.debug("Expanded S3 object path {}", expandedPath);
                expandedPaths.add(expandedPath);
            }
        }
    } while (continuationToken != null);
    return ExpandedGlob.create(glob, expandedPaths.build());
}
Also used : Pattern(java.util.regex.Pattern) ListObjectsV2Request(com.amazonaws.services.s3.model.ListObjectsV2Request) ListObjectsV2Result(com.amazonaws.services.s3.model.ListObjectsV2Result) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) AmazonClientException(com.amazonaws.AmazonClientException) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOException(java.io.IOException)

Aggregations

Filter (com.amazonaws.services.ec2.model.Filter)96 ArrayList (java.util.ArrayList)70 List (java.util.List)52 Collectors (java.util.stream.Collectors)46 IOException (java.io.IOException)41 HashMap (java.util.HashMap)38 Map (java.util.Map)35 AmazonS3 (com.amazonaws.services.s3.AmazonS3)34 Set (java.util.Set)31 DescribeInstancesRequest (com.amazonaws.services.ec2.model.DescribeInstancesRequest)30 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)27 Instance (com.amazonaws.services.ec2.model.Instance)26 HashSet (java.util.HashSet)26 Reservation (com.amazonaws.services.ec2.model.Reservation)24 Collections (java.util.Collections)23 DescribeInstancesResult (com.amazonaws.services.ec2.model.DescribeInstancesResult)21 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)21 DescribeSubnetsRequest (com.amazonaws.services.ec2.model.DescribeSubnetsRequest)20 Entry (java.util.Map.Entry)20 Tag (com.amazonaws.services.ec2.model.Tag)18