use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3InputSourceTest method testCreateSplitsWithSplitHintSpecRespectingHint.
@Test
public void testCreateSplitsWithSplitHintSpecRespectingHint() {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(new HumanReadableBytes(CONTENT.length * 3L), null));
Assert.assertEquals(ImmutableList.of(EXPECTED_URIS.stream().map(CloudObjectLocation::new).collect(Collectors.toList())), splits.map(InputSplit::get).collect(Collectors.toList()));
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3InputSourceTest method testWithPrefixesSplit.
@Test
public void testWithPrefixesSplit() {
EasyMock.reset(S3_CLIENT);
expectListObjects(PREFIXES.get(0), ImmutableList.of(EXPECTED_URIS.get(0)), CONTENT);
expectListObjects(PREFIXES.get(1), ImmutableList.of(EXPECTED_URIS.get(1)), CONTENT);
EasyMock.replay(S3_CLIENT);
S3InputSource inputSource = new S3InputSource(SERVICE, SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER, INPUT_DATA_CONFIG, null, PREFIXES, null, null);
Stream<InputSplit<List<CloudObjectLocation>>> splits = inputSource.createSplits(new JsonInputFormat(JSONPathSpec.DEFAULT, null, null), new MaxSizeSplitHintSpec(null, 1));
Assert.assertEquals(EXPECTED_COORDS, splits.map(InputSplit::get).collect(Collectors.toList()));
EasyMock.verify(S3_CLIENT);
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3TimestampVersionedDataFinder method getLatestVersion.
/**
* Gets the key with the most recently modified timestamp.
* `pattern` is evaluated against the entire key AFTER the path given in `uri`.
* The substring `pattern` is matched against will have a leading `/` removed.
* For example `s3://some_bucket/some_prefix/some_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key`.
* `s3://some_bucket/some_prefixsome_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key`
* `s3://some_bucket/some_prefix//some_key` with a URI of `s3://some_bucket/some_prefix` will match against `/some_key`
*
* @param uri The URI of in the form of `s3://some_bucket/some_key`
* @param pattern The pattern matcher to determine if a *key* is of interest, or `null` to match everything.
*
* @return A URI to the most recently modified object which matched the pattern.
*/
@Override
public URI getLatestVersion(final URI uri, @Nullable final Pattern pattern) {
try {
final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
long mostRecent = Long.MIN_VALUE;
URI latest = null;
final Iterator<S3ObjectSummary> objectSummaryIterator = S3Utils.objectSummaryIterator(s3Client, Collections.singletonList(uri), MAX_LISTING_KEYS);
while (objectSummaryIterator.hasNext()) {
final S3ObjectSummary objectSummary = objectSummaryIterator.next();
final CloudObjectLocation objectLocation = S3Utils.summaryToCloudObjectLocation(objectSummary);
// remove coords path prefix from object path
String keyString = StringUtils.maybeRemoveLeadingSlash(objectLocation.getPath().substring(coords.getPath().length()));
if (pattern != null && !pattern.matcher(keyString).matches()) {
continue;
}
final long latestModified = objectSummary.getLastModified().getTime();
if (latestModified >= mostRecent) {
mostRecent = latestModified;
latest = objectLocation.toUri(S3StorageDruidModule.SCHEME);
}
}
return latest;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class OssDataSegmentPuller method getVersion.
/**
* Returns the "version" (aka last modified timestamp) of the URI
*
* @param uri The URI to check the last timestamp
* @return The time in ms of the last modification of the URI in String format
* @throws IOException
*/
@Override
public String getVersion(URI uri) throws IOException {
try {
final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
return StringUtils.format("%d", objectSummary.getLastModified().getTime());
} catch (OSSException e) {
if (OssUtils.isServiceExceptionRecoverable(e)) {
// The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
} else {
throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
}
}
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class OssDataSegmentPuller method buildFileObject.
private FileObject buildFileObject(final URI uri) throws OSSException {
final CloudObjectLocation coords = new CloudObjectLocation(OssUtils.checkURI(uri));
final OSSObjectSummary objectSummary = OssUtils.getSingleObjectSummary(client, coords.getBucket(), coords.getPath());
final String path = uri.getPath();
return new FileObject() {
OSSObject ossObject = null;
@Override
public URI toUri() {
return uri;
}
@Override
public String getName() {
final String ext = Files.getFileExtension(path);
return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
}
/**
* Returns an input stream for an OSS object. The returned input stream is not thread-safe.
*/
@Override
public InputStream openInputStream() throws IOException {
try {
if (ossObject == null) {
// lazily promote to full GET
ossObject = client.getObject(objectSummary.getBucketName(), objectSummary.getKey());
}
final InputStream in = ossObject.getObjectContent();
final Closer closer = Closer.create();
closer.register(in);
closer.register(ossObject);
return new FilterInputStream(in) {
@Override
public void close() throws IOException {
closer.close();
}
};
} catch (OSSException e) {
throw new IOE(e, "Could not load OSS URI [%s]", uri);
}
}
@Override
public OutputStream openOutputStream() {
throw new UOE("Cannot stream OSS output");
}
@Override
public Reader openReader(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open reader");
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open character sequence");
}
@Override
public Writer openWriter() {
throw new UOE("Cannot open writer");
}
@Override
public long getLastModified() {
return objectSummary.getLastModified().getTime();
}
@Override
public boolean delete() {
throw new UOE("Cannot delete OSS items anonymously. jetS3t doesn't support authenticated deletes easily.");
}
};
}
Aggregations