use of software.amazon.awssdk.services.s3.model.ListObjectsV2Request in project Singularity by HubSpot.
the class S3LogResource method getS3LogsWithExecutorService.
// Fetching logs
private List<SingularityS3LogMetadata> getS3LogsWithExecutorService(S3Configuration s3Configuration, ListeningExecutorService executorService, Map<SingularityS3Service, Set<String>> servicesToPrefixes, int totalPrefixCount, final SingularityS3SearchRequest search, final ConcurrentHashMap<String, ContinuationToken> continuationTokens, final boolean paginated) throws InterruptedException, ExecutionException, TimeoutException {
List<ListenableFuture<List<S3ObjectSummaryHolder>>> futures = Lists.newArrayListWithCapacity(totalPrefixCount);
final AtomicInteger resultCount = new AtomicInteger();
for (final Map.Entry<SingularityS3Service, Set<String>> entry : servicesToPrefixes.entrySet()) {
final String s3Bucket = entry.getKey().getBucket();
final String group = entry.getKey().getGroup();
final AmazonS3 s3Client = entry.getKey().getS3Client();
for (final String s3Prefix : entry.getValue()) {
final String key = String.format(CONTINUATION_TOKEN_KEY_FORMAT, group, s3Bucket, s3Prefix);
if (search.getContinuationTokens().containsKey(key) && search.getContinuationTokens().get(key).isLastPage()) {
LOG.trace("No further content for prefix {} in bucket {}, skipping", s3Prefix, s3Bucket);
continuationTokens.putIfAbsent(key, search.getContinuationTokens().get(key));
continue;
}
futures.add(executorService.submit(new Callable<List<S3ObjectSummaryHolder>>() {
@Override
public List<S3ObjectSummaryHolder> call() throws Exception {
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(s3Bucket).withPrefix(s3Prefix);
if (paginated) {
Optional<ContinuationToken> token = Optional.absent();
if (search.getContinuationTokens().containsKey(key) && !Strings.isNullOrEmpty(search.getContinuationTokens().get(key).getValue())) {
request.setContinuationToken(search.getContinuationTokens().get(key).getValue());
token = Optional.of(search.getContinuationTokens().get(key));
}
int targetResultCount = search.getMaxPerPage().or(DEFAULT_TARGET_MAX_RESULTS);
request.setMaxKeys(targetResultCount);
if (resultCount.get() < targetResultCount) {
ListObjectsV2Result result = s3Client.listObjectsV2(request);
if (result.getObjectSummaries().isEmpty()) {
continuationTokens.putIfAbsent(key, new ContinuationToken(result.getNextContinuationToken(), true));
return Collections.emptyList();
} else {
boolean addToList = incrementIfLessThan(resultCount, result.getObjectSummaries().size(), targetResultCount);
if (addToList) {
continuationTokens.putIfAbsent(key, new ContinuationToken(result.getNextContinuationToken(), !result.isTruncated()));
List<S3ObjectSummaryHolder> objectSummaryHolders = new ArrayList<>();
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
objectSummaryHolders.add(new S3ObjectSummaryHolder(group, objectSummary));
}
return objectSummaryHolders;
} else {
continuationTokens.putIfAbsent(key, token.or(new ContinuationToken(null, false)));
return Collections.emptyList();
}
}
} else {
continuationTokens.putIfAbsent(key, token.or(new ContinuationToken(null, false)));
return Collections.emptyList();
}
} else {
ListObjectsV2Result result = s3Client.listObjectsV2(request);
List<S3ObjectSummaryHolder> objectSummaryHolders = new ArrayList<>();
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
objectSummaryHolders.add(new S3ObjectSummaryHolder(group, objectSummary));
}
return objectSummaryHolders;
}
}
}));
}
}
final long start = System.currentTimeMillis();
List<List<S3ObjectSummaryHolder>> results = Futures.allAsList(futures).get(s3Configuration.getWaitForS3ListSeconds(), TimeUnit.SECONDS);
List<S3ObjectSummaryHolder> objects = Lists.newArrayListWithExpectedSize(results.size() * 2);
for (List<S3ObjectSummaryHolder> s3ObjectSummaryHolders : results) {
for (final S3ObjectSummaryHolder s3ObjectHolder : s3ObjectSummaryHolders) {
objects.add(s3ObjectHolder);
}
}
LOG.trace("Got {} objects from S3 after {}", objects.size(), JavaUtils.duration(start));
List<ListenableFuture<SingularityS3LogMetadata>> logFutures = Lists.newArrayListWithCapacity(objects.size());
final Date expireAt = new Date(System.currentTimeMillis() + s3Configuration.getExpireS3LinksAfterMillis());
for (final S3ObjectSummaryHolder s3ObjectHolder : objects) {
final S3ObjectSummary s3Object = s3ObjectHolder.getObjectSummary();
final AmazonS3 s3Client = s3Services.getServiceByGroupAndBucketOrDefault(s3ObjectHolder.getGroup(), s3Object.getBucketName()).getS3Client();
logFutures.add(executorService.submit(new Callable<SingularityS3LogMetadata>() {
@Override
public SingularityS3LogMetadata call() throws Exception {
Optional<Long> maybeStartTime = Optional.absent();
Optional<Long> maybeEndTime = Optional.absent();
if (!search.isExcludeMetadata()) {
GetObjectMetadataRequest metadataRequest = new GetObjectMetadataRequest(s3Object.getBucketName(), s3Object.getKey());
Map<String, String> objectMetadata = s3Client.getObjectMetadata(metadataRequest).getUserMetadata();
maybeStartTime = getMetadataAsLong(objectMetadata, SingularityS3Log.LOG_START_S3_ATTR);
maybeEndTime = getMetadataAsLong(objectMetadata, SingularityS3Log.LOG_END_S3_ATTR);
}
if (search.isListOnly()) {
return new SingularityS3LogMetadata(s3Object.getKey(), s3Object.getLastModified().getTime(), s3Object.getSize(), maybeStartTime, maybeEndTime);
} else {
GeneratePresignedUrlRequest getUrlRequest = new GeneratePresignedUrlRequest(s3Object.getBucketName(), s3Object.getKey()).withMethod(HttpMethod.GET).withExpiration(expireAt);
String getUrl = s3Client.generatePresignedUrl(getUrlRequest).toString();
ResponseHeaderOverrides downloadHeaders = new ResponseHeaderOverrides();
downloadHeaders.setContentDisposition(CONTENT_DISPOSITION_DOWNLOAD_HEADER);
downloadHeaders.setContentEncoding(CONTENT_ENCODING_DOWNLOAD_HEADER);
GeneratePresignedUrlRequest downloadUrlRequest = new GeneratePresignedUrlRequest(s3Object.getBucketName(), s3Object.getKey()).withMethod(HttpMethod.GET).withExpiration(expireAt).withResponseHeaders(downloadHeaders);
String downloadUrl = s3Client.generatePresignedUrl(downloadUrlRequest).toString();
return new SingularityS3Log(getUrl, s3Object.getKey(), s3Object.getLastModified().getTime(), s3Object.getSize(), downloadUrl, maybeStartTime, maybeEndTime);
}
}
}));
}
return Futures.allAsList(logFutures).get(s3Configuration.getWaitForS3LinksSeconds(), TimeUnit.SECONDS);
}
use of software.amazon.awssdk.services.s3.model.ListObjectsV2Request in project beam by apache.
the class S3FileSystem method expandGlob.
private ExpandedGlob expandGlob(S3ResourceId glob) {
// The S3 API can list objects, filtered by prefix, but not by wildcard.
// Here, we find the longest prefix without wildcard "*",
// then filter the results with a regex.
checkArgument(glob.isWildcard(), "isWildcard");
String keyPrefix = glob.getKeyNonWildcardPrefix();
Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey()));
LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp.toString());
ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder();
String continuationToken = null;
do {
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(glob.getBucket()).withPrefix(keyPrefix).withContinuationToken(continuationToken);
ListObjectsV2Result result;
try {
result = amazonS3.get().listObjectsV2(request);
} catch (AmazonClientException e) {
return ExpandedGlob.create(glob, new IOException(e));
}
continuationToken = result.getNextContinuationToken();
for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
// Filter against regex.
if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) {
S3ResourceId expandedPath = S3ResourceId.fromComponents(glob.getScheme(), objectSummary.getBucketName(), objectSummary.getKey()).withSize(objectSummary.getSize()).withLastModified(objectSummary.getLastModified());
LOG.debug("Expanded S3 object path {}", expandedPath);
expandedPaths.add(expandedPath);
}
}
} while (continuationToken != null);
return ExpandedGlob.create(glob, expandedPaths.build());
}
use of software.amazon.awssdk.services.s3.model.ListObjectsV2Request in project beam by apache.
the class S3FileSystemTest method matchGlob.
@Test
public void matchGlob() throws IOException {
S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3"));
S3ResourceId path = S3ResourceId.fromUri("mys3://testbucket/foo/bar*baz");
ListObjectsV2Request firstRequest = new ListObjectsV2Request().withBucketName(path.getBucket()).withPrefix(path.getKeyNonWildcardPrefix()).withContinuationToken(null);
// Expected to be returned; prefix and wildcard/regex match
S3ObjectSummary firstMatch = new S3ObjectSummary();
firstMatch.setBucketName(path.getBucket());
firstMatch.setKey("foo/bar0baz");
firstMatch.setSize(100);
firstMatch.setLastModified(new Date(1540000000001L));
// Expected to not be returned; prefix matches, but substring after wildcard does not
S3ObjectSummary secondMatch = new S3ObjectSummary();
secondMatch.setBucketName(path.getBucket());
secondMatch.setKey("foo/bar1qux");
secondMatch.setSize(200);
secondMatch.setLastModified(new Date(1540000000002L));
// Expected first request returns continuation token
ListObjectsV2Result firstResult = new ListObjectsV2Result();
firstResult.setNextContinuationToken("token");
firstResult.getObjectSummaries().add(firstMatch);
firstResult.getObjectSummaries().add(secondMatch);
when(s3FileSystem.getAmazonS3Client().listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(firstRequest)))).thenReturn(firstResult);
// Expect second request with continuation token
ListObjectsV2Request secondRequest = new ListObjectsV2Request().withBucketName(path.getBucket()).withPrefix(path.getKeyNonWildcardPrefix()).withContinuationToken("token");
// Expected to be returned; prefix and wildcard/regex match
S3ObjectSummary thirdMatch = new S3ObjectSummary();
thirdMatch.setBucketName(path.getBucket());
thirdMatch.setKey("foo/bar2baz");
thirdMatch.setSize(300);
thirdMatch.setLastModified(new Date(1540000000003L));
// Expected second request returns third prefix match and no continuation token
ListObjectsV2Result secondResult = new ListObjectsV2Result();
secondResult.setNextContinuationToken(null);
secondResult.getObjectSummaries().add(thirdMatch);
when(s3FileSystem.getAmazonS3Client().listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(secondRequest)))).thenReturn(secondResult);
// Expect object metadata queries for content encoding
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentEncoding("");
when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata);
assertThat(s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), MatchResultMatcher.create(ImmutableList.of(MatchResult.Metadata.builder().setIsReadSeekEfficient(true).setResourceId(S3ResourceId.fromComponents("mys3", firstMatch.getBucketName(), firstMatch.getKey())).setSizeBytes(firstMatch.getSize()).setLastModifiedMillis(firstMatch.getLastModified().getTime()).build(), MatchResult.Metadata.builder().setIsReadSeekEfficient(true).setResourceId(S3ResourceId.fromComponents("mys3", thirdMatch.getBucketName(), thirdMatch.getKey())).setSizeBytes(thirdMatch.getSize()).setLastModifiedMillis(thirdMatch.getLastModified().getTime()).build())));
}
use of software.amazon.awssdk.services.s3.model.ListObjectsV2Request in project beam by apache.
the class S3FileSystemTest method matchGlobWithSlashesWithS3Options.
@Test
public void matchGlobWithSlashesWithS3Options() throws IOException {
S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options());
S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar\\baz*");
ListObjectsV2Request request = new ListObjectsV2Request().withBucketName(path.getBucket()).withPrefix(path.getKeyNonWildcardPrefix()).withContinuationToken(null);
// Expected to be returned; prefix and wildcard/regex match
S3ObjectSummary firstMatch = new S3ObjectSummary();
firstMatch.setBucketName(path.getBucket());
firstMatch.setKey("foo/bar\\baz0");
firstMatch.setSize(100);
firstMatch.setLastModified(new Date(1540000000001L));
// Expected to not be returned; prefix matches, but substring after wildcard does not
S3ObjectSummary secondMatch = new S3ObjectSummary();
secondMatch.setBucketName(path.getBucket());
secondMatch.setKey("foo/bar/baz1");
secondMatch.setSize(200);
secondMatch.setLastModified(new Date(1540000000002L));
// Expected first request returns continuation token
ListObjectsV2Result result = new ListObjectsV2Result();
result.getObjectSummaries().add(firstMatch);
result.getObjectSummaries().add(secondMatch);
when(s3FileSystem.getAmazonS3Client().listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(request)))).thenReturn(result);
// Expect object metadata queries for content encoding
ObjectMetadata metadata = new ObjectMetadata();
metadata.setContentEncoding("");
when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata);
assertThat(s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), MatchResultMatcher.create(ImmutableList.of(MatchResult.Metadata.builder().setIsReadSeekEfficient(true).setResourceId(S3ResourceId.fromComponents("s3", firstMatch.getBucketName(), firstMatch.getKey())).setSizeBytes(firstMatch.getSize()).setLastModifiedMillis(firstMatch.getLastModified().getTime()).build())));
}
use of software.amazon.awssdk.services.s3.model.ListObjectsV2Request in project beam by apache.
the class S3FileSystem method expandGlob.
private ExpandedGlob expandGlob(S3ResourceId glob) {
// The S3 API can list objects, filtered by prefix, but not by wildcard.
// Here, we find the longest prefix without wildcard "*",
// then filter the results with a regex.
checkArgument(glob.isWildcard(), "isWildcard");
String keyPrefix = glob.getKeyNonWildcardPrefix();
Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey()));
LOG.debug("expanding bucket {}, prefix {}, against pattern {}", glob.getBucket(), keyPrefix, wildcardRegexp);
ImmutableList.Builder<S3ResourceId> expandedPaths = ImmutableList.builder();
String continuationToken = null;
do {
ListObjectsV2Request request = ListObjectsV2Request.builder().bucket(glob.getBucket()).prefix(keyPrefix).continuationToken(continuationToken).build();
ListObjectsV2Response response;
try {
response = s3Client.get().listObjectsV2(request);
} catch (SdkServiceException e) {
return ExpandedGlob.create(glob, new IOException(e));
}
continuationToken = response.nextContinuationToken();
List<S3Object> contents = response.contents();
contents.stream().filter(s3Object -> wildcardRegexp.matcher(s3Object.key()).matches()).forEach(s3Object -> {
S3ResourceId expandedPath = S3ResourceId.fromComponents(glob.getScheme(), glob.getBucket(), s3Object.key()).withSize(s3Object.size()).withLastModified(Date.from(s3Object.lastModified()));
LOG.debug("Expanded S3 object path {}", expandedPath);
expandedPaths.add(expandedPath);
});
} while (continuationToken != null);
return ExpandedGlob.create(glob, expandedPaths.build());
}
Aggregations