use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class AzureUtils method deleteObjectsInPath.
/**
* Delete the files from Azure Storage in a specified bucket, matching a specified prefix and filter
*
* @param storage Azure Storage client
* @param config specifies the configuration to use when finding matching files in Azure Storage to delete
* @param bucket Azure Storage bucket
* @param prefix the file prefix
* @param filter function which returns true if the prefix file found should be deleted and false otherwise.
* @throws Exception
*/
public static void deleteObjectsInPath(AzureStorage storage, AzureInputDataConfig config, AzureAccountConfig accountConfig, AzureCloudBlobIterableFactory azureCloudBlobIterableFactory, String bucket, String prefix, Predicate<CloudBlobHolder> filter) throws Exception {
AzureCloudBlobIterable azureCloudBlobIterable = azureCloudBlobIterableFactory.create(ImmutableList.of(new CloudObjectLocation(bucket, prefix).toUri("azure")), config.getMaxListingLength());
Iterator<CloudBlobHolder> iterator = azureCloudBlobIterable.iterator();
while (iterator.hasNext()) {
final CloudBlobHolder nextObject = iterator.next();
if (filter.apply(nextObject)) {
deleteBucketKeys(storage, accountConfig.getMaxTries(), nextObject.getContainerName(), nextObject.getName());
}
}
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3Utils method deleteObjectsInPath.
/**
* Delete the files from S3 in a specified bucket, matching a specified prefix and filter
* @param s3Client s3 client
* @param config specifies the configuration to use when finding matching files in S3 to delete
* @param bucket s3 bucket
* @param prefix the file prefix
* @param filter function which returns true if the prefix file found should be deleted and false otherwise.
* @throws Exception
*/
public static void deleteObjectsInPath(ServerSideEncryptingAmazonS3 s3Client, S3InputDataConfig config, String bucket, String prefix, Predicate<S3ObjectSummary> filter) throws Exception {
final List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>(config.getMaxListingLength());
final ObjectSummaryIterator iterator = new ObjectSummaryIterator(s3Client, ImmutableList.of(new CloudObjectLocation(bucket, prefix).toUri("s3")), config.getMaxListingLength());
while (iterator.hasNext()) {
final S3ObjectSummary nextObject = iterator.next();
if (filter.apply(nextObject)) {
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(nextObject.getKey()));
if (keysToDelete.size() == config.getMaxListingLength()) {
deleteBucketKeys(s3Client, bucket, keysToDelete);
log.info("Deleted %d files", keysToDelete.size());
keysToDelete.clear();
}
}
}
if (keysToDelete.size() > 0) {
deleteBucketKeys(s3Client, bucket, keysToDelete);
log.info("Deleted %d files", keysToDelete.size());
}
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3DataSegmentPuller method getVersion.
/**
* Returns the "version" (aka last modified timestamp) of the URI
*
* @param uri The URI to check the last timestamp
* @return The time in ms of the last modification of the URI in String format
* @throws IOException
*/
@Override
public String getVersion(URI uri) throws IOException {
try {
final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
return StringUtils.format("%d", objectSummary.getLastModified().getTime());
} catch (AmazonClientException e) {
if (AWSClientUtil.isClientExceptionRecoverable(e)) {
// The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
} else {
throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
}
}
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class S3DataSegmentPuller method buildFileObject.
public FileObject buildFileObject(final URI uri) throws AmazonServiceException {
final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
final String path = uri.getPath();
return new FileObject() {
S3Object s3Object = null;
S3ObjectSummary objectSummary = null;
@Override
public URI toUri() {
return uri;
}
@Override
public String getName() {
final String ext = Files.getFileExtension(path);
return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
}
/**
* Returns an input stream for a s3 object. The returned input stream is not thread-safe.
*/
@Override
public InputStream openInputStream() throws IOException {
try {
if (s3Object == null) {
// lazily promote to full GET
s3Object = s3Client.getObject(coords.getBucket(), coords.getPath());
}
final InputStream in = s3Object.getObjectContent();
final Closer closer = Closer.create();
closer.register(in);
closer.register(s3Object);
return new FilterInputStream(in) {
@Override
public void close() throws IOException {
closer.close();
}
};
} catch (AmazonServiceException e) {
throw new IOE(e, "Could not load S3 URI [%s]", uri);
}
}
@Override
public OutputStream openOutputStream() {
throw new UOE("Cannot stream S3 output");
}
@Override
public Reader openReader(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open reader");
}
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) {
throw new UOE("Cannot open character sequence");
}
@Override
public Writer openWriter() {
throw new UOE("Cannot open writer");
}
@Override
public long getLastModified() {
if (s3Object != null) {
return s3Object.getObjectMetadata().getLastModified().getTime();
}
if (objectSummary == null) {
objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
}
return objectSummary.getLastModified().getTime();
}
@Override
public boolean delete() {
throw new UOE("Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily.");
}
};
}
use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.
the class GoogleTimestampVersionedDataFinder method getLatestVersion.
@Override
public URI getLatestVersion(URI descriptorBase, @Nullable Pattern pattern) {
try {
long mostRecent = Long.MIN_VALUE;
URI latest = null;
final CloudObjectLocation baseLocation = new CloudObjectLocation(descriptorBase);
final Objects objects = storage.list(baseLocation.getBucket()).setPrefix(baseLocation.getPath()).setMaxResults(MAX_LISTING_KEYS).execute();
for (StorageObject storageObject : objects.getItems()) {
if (GoogleUtils.isDirectoryPlaceholder(storageObject)) {
continue;
}
// remove path prefix from file name
final CloudObjectLocation objectLocation = new CloudObjectLocation(storageObject.getBucket(), storageObject.getName());
final String keyString = StringUtils.maybeRemoveLeadingSlash(storageObject.getName().substring(baseLocation.getPath().length()));
if (pattern != null && !pattern.matcher(keyString).matches()) {
continue;
}
final long latestModified = storageObject.getUpdated().getValue();
if (latestModified >= mostRecent) {
mostRecent = latestModified;
latest = objectLocation.toUri(GoogleStorageDruidModule.SCHEME_GS);
}
}
return latest;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations