Search in sources :

Example 21 with Filter

use of in project trino by trinodb.

the class TrinoS3FileSystem method listPrefix.

 * List all objects whose absolute path matches the provided prefix.
private Iterator<LocatedFileStatus> listPrefix(String prefix, OptionalInt initialMaxKeys, ListingMode mode) {
    ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(getBucketName(uri)).withPrefix(prefix).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null).withRequesterPays(requesterPaysEnabled);
    Iterator<ListObjectsV2Result> listings = new AbstractSequentialIterator<>(s3.listObjectsV2(request)) {

        protected ListObjectsV2Result computeNext(ListObjectsV2Result previous) {
            if (!previous.isTruncated()) {
                return null;
            // Clear any max keys after the first batch completes
            return s3.listObjectsV2(request);
    Iterator<LocatedFileStatus> results = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
    if (mode.isFilesOnly()) {
        // Even recursive listing can still contain empty "directory" objects, must filter them out
        results = Iterators.filter(results, LocatedFileStatus::isFile);
    return results;
Also used : ListObjectsV2Request( ListObjectsV2Result( LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) AbstractSequentialIterator(

Example 22 with Filter

use of in project sirius-biz by scireum.

the class ObjectStore method listObjects.

 * Iterates of all objects in a bucket.
 * <p>
 * Keep in mind that a bucket might contain a very large amount of objects. This method must be used with care and
 * a lot of thought.
 * @param bucket   the bucket to list objects for
 * @param prefix   the object name prefix used to filter
 * @param consumer the consumer to be supplied with each found object. As soon as <tt>false</tt> is returned,
 *                 the iteration stops.
public void listObjects(BucketName bucket, @Nullable String prefix, Predicate<S3ObjectSummary> consumer) {
    ObjectListing objectListing = null;
    TaskContext taskContext = CallContext.getCurrent().get(TaskContext.class);
    do {
        try (Operation operation = new Operation(() -> Strings.apply("S3: Fetching objects from %s (prefix: %s)", bucket.getName(), prefix), Duration.ofSeconds(10))) {
            if (objectListing != null) {
                objectListing = getClient().listNextBatchOfObjects(objectListing);
            } else {
                objectListing = getClient().listObjects(bucket.getName(), prefix);
        for (S3ObjectSummary obj : objectListing.getObjectSummaries()) {
            if (!consumer.test(obj) || !taskContext.isActive()) {
    } while (objectListing.isTruncated() && taskContext.isActive());
Also used : TaskContext(sirius.kernel.async.TaskContext) ObjectListing( S3ObjectSummary( Operation(sirius.kernel.async.Operation)

Example 23 with Filter

use of in project presto by prestodb.

the class PrestoS3FileSystem method listPrefix.

private Iterator<LocatedFileStatus> listPrefix(Path path, OptionalInt initialMaxKeys, ListingMode mode) {
    String key = keyFromPath(path);
    if (!key.isEmpty()) {
        key += PATH_SEPARATOR;
    ListObjectsRequest request = new ListObjectsRequest().withBucketName(getBucketName(uri)).withPrefix(key).withDelimiter(mode == ListingMode.RECURSIVE_FILES_ONLY ? null : PATH_SEPARATOR).withMaxKeys(initialMaxKeys.isPresent() ? initialMaxKeys.getAsInt() : null);
    Iterator<ObjectListing> listings = new AbstractSequentialIterator<ObjectListing>(s3.listObjects(request)) {

        protected ObjectListing computeNext(ObjectListing previous) {
            if (!previous.isTruncated()) {
                return null;
            // Clear any max keys set for the initial request before submitting subsequent requests. Values < 0
            // are not sent in the request and the default limit is used
            return s3.listNextBatchOfObjects(previous);
    Iterator<LocatedFileStatus> result = Iterators.concat(Iterators.transform(listings, this::statusFromListing));
    if (mode.isFilesOnly()) {
        // Even recursive listing can still contain empty "directory" objects, must filter them out
        result = Iterators.filter(result, LocatedFileStatus::isFile);
    return result;
Also used : ListObjectsRequest( ObjectListing( LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) AbstractSequentialIterator(

Example 24 with Filter

use of in project druid by druid-io.

the class EC2AutoScaler method idToIpLookup.

public List<String> idToIpLookup(List<String> nodeIds) {
    final List<String> retVal = FluentIterable.from(Lists.partition(nodeIds, MAX_AWS_FILTER_VALUES)).transformAndConcat(new Function<List<String>, Iterable<Reservation>>() {

        public Iterable<Reservation> apply(List<String> input) {
            return amazonEC2Client.describeInstances(new DescribeInstancesRequest().withFilters(new Filter("instance-id", input))).getReservations();
    }).transformAndConcat(new Function<Reservation, Iterable<Instance>>() {

        public Iterable<Instance> apply(Reservation reservation) {
            return reservation.getInstances();
    }).transform(new Function<Instance, String>() {

        public String apply(Instance instance) {
            return instance.getPrivateIpAddress();
    log.debug("Performing lookup: %s --> %s", nodeIds, retVal);
    return retVal;
Also used : Function( Reservation( FluentIterable( Filter( Instance( ArrayList(java.util.ArrayList) List(java.util.List) DescribeInstancesRequest(

Example 25 with Filter

use of in project beam by apache.

the class S3FileSystem method expandGlob.

private ExpandedGlob expandGlob(S3ResourceId glob) {
    // The S3 API can list objects, filtered by prefix, but not by wildcard.
    // Here, we find the longest prefix without wildcard "*",
    // then filter the results with a regex.
    checkArgument(glob.isWildcard(), "isWildcard");
    String keyPrefix = glob.getKeyNonWildcardPrefix();
    Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey()));
    LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp.toString());
    ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder();
    String continuationToken = null;
    do {
        ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(glob.getBucket()).withPrefix(keyPrefix).withContinuationToken(continuationToken);
        ListObjectsV2Result result;
        try {
            result = amazonS3.get().listObjectsV2(request);
        } catch (AmazonClientException e) {
            return ExpandedGlob.create(glob, new IOException(e));
        continuationToken = result.getNextContinuationToken();
        for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
            // Filter against regex.
            if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) {
                S3ResourceId expandedPath = S3ResourceId.fromComponents(glob.getScheme(), objectSummary.getBucketName(), objectSummary.getKey()).withSize(objectSummary.getSize()).withLastModified(objectSummary.getLastModified());
                LOG.debug("Expanded S3 object path {}", expandedPath);
    } while (continuationToken != null);
    return ExpandedGlob.create(glob,;
Also used : Pattern(java.util.regex.Pattern) ListObjectsV2Request( ListObjectsV2Result( ImmutableList( AmazonClientException(com.amazonaws.AmazonClientException) S3ObjectSummary( IOException(


Filter ( ArrayList (java.util.ArrayList)70 List (java.util.List)52 Collectors ( IOException ( HashMap (java.util.HashMap)38 Map (java.util.Map)35 AmazonS3 ( Set (java.util.Set)31 DescribeInstancesRequest ( S3ObjectSummary ( Instance ( HashSet (java.util.HashSet)26 Reservation ( Collections (java.util.Collections)23 DescribeInstancesResult ( ObjectListing ( DescribeSubnetsRequest ( Entry (java.util.Map.Entry)20 Tag (