use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class GcsFileSystem method toMetadata.
private Metadata toMetadata(StorageObject storageObject) {
// TODO: Address https://issues.apache.org/jira/browse/BEAM-1494
// It is incorrect to set IsReadSeekEfficient true for files with content encoding set to gzip.
Metadata.Builder ret = Metadata.builder().setIsReadSeekEfficient(true).setResourceId(GcsResourceId.fromGcsPath(GcsPath.fromObject(storageObject)));
if (storageObject.getMd5Hash() != null) {
ret.setChecksum(storageObject.getMd5Hash());
}
BigInteger size = firstNonNull(storageObject.getSize(), BigInteger.ZERO);
ret.setSizeBytes(size.longValue());
DateTime lastModified = firstNonNull(storageObject.getUpdated(), new DateTime(0L));
ret.setLastModifiedMillis(lastModified.getValue());
return ret.build();
}
use of org.apache.beam.sdk.io.fs.MatchResult.Metadata in project beam by apache.
the class GcsFileSystem method expand.
/**
* Expands a pattern into {@link MatchResult}.
*
* @throws IllegalArgumentException if {@code gcsPattern} does not contain globs.
*/
@VisibleForTesting
MatchResult expand(GcsPath gcsPattern) throws IOException {
String prefix = GcsUtil.getNonWildcardPrefix(gcsPattern.getObject());
Pattern p = Pattern.compile(wildcardToRegexp(gcsPattern.getObject()));
LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(), prefix, p.toString());
String pageToken = null;
List<Metadata> results = new ArrayList<>();
do {
Objects objects = options.getGcsUtil().listObjects(gcsPattern.getBucket(), prefix, pageToken);
if (objects.getItems() == null) {
break;
}
// Filter objects based on the regex.
for (StorageObject o : objects.getItems()) {
String name = o.getName();
// Skip directories, which end with a slash.
if (p.matcher(name).matches() && !name.endsWith("/")) {
LOG.debug("Matched object: {}", name);
results.add(toMetadata(o));
}
}
pageToken = objects.getNextPageToken();
} while (pageToken != null);
return MatchResult.create(Status.OK, results);
}
Aggregations