Search in sources :

Example 21 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class AzureUtils method deleteObjectsInPath.

/**
 * Delete the files from Azure Storage in a specified bucket, matching a specified prefix and filter
 *
 * @param storage Azure Storage client
 * @param config  specifies the configuration to use when finding matching files in Azure Storage to delete
 * @param bucket  Azure Storage bucket
 * @param prefix  the file prefix
 * @param filter  function which returns true if the prefix file found should be deleted and false otherwise.
 * @throws Exception
 */
public static void deleteObjectsInPath(AzureStorage storage, AzureInputDataConfig config, AzureAccountConfig accountConfig, AzureCloudBlobIterableFactory azureCloudBlobIterableFactory, String bucket, String prefix, Predicate<CloudBlobHolder> filter) throws Exception {
    AzureCloudBlobIterable azureCloudBlobIterable = azureCloudBlobIterableFactory.create(ImmutableList.of(new CloudObjectLocation(bucket, prefix).toUri("azure")), config.getMaxListingLength());
    Iterator<CloudBlobHolder> iterator = azureCloudBlobIterable.iterator();
    while (iterator.hasNext()) {
        final CloudBlobHolder nextObject = iterator.next();
        if (filter.apply(nextObject)) {
            deleteBucketKeys(storage, accountConfig.getMaxTries(), nextObject.getContainerName(), nextObject.getName());
        }
    }
}
Also used : CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) CloudBlobHolder(org.apache.druid.storage.azure.blob.CloudBlobHolder)

Example 22 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class S3Utils method deleteObjectsInPath.

/**
 * Delete the files from S3 in a specified bucket, matching a specified prefix and filter
 * @param s3Client s3 client
 * @param config   specifies the configuration to use when finding matching files in S3 to delete
 * @param bucket   s3 bucket
 * @param prefix   the file prefix
 * @param filter   function which returns true if the prefix file found should be deleted and false otherwise.
 * @throws Exception
 */
public static void deleteObjectsInPath(ServerSideEncryptingAmazonS3 s3Client, S3InputDataConfig config, String bucket, String prefix, Predicate<S3ObjectSummary> filter) throws Exception {
    final List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>(config.getMaxListingLength());
    final ObjectSummaryIterator iterator = new ObjectSummaryIterator(s3Client, ImmutableList.of(new CloudObjectLocation(bucket, prefix).toUri("s3")), config.getMaxListingLength());
    while (iterator.hasNext()) {
        final S3ObjectSummary nextObject = iterator.next();
        if (filter.apply(nextObject)) {
            keysToDelete.add(new DeleteObjectsRequest.KeyVersion(nextObject.getKey()));
            if (keysToDelete.size() == config.getMaxListingLength()) {
                deleteBucketKeys(s3Client, bucket, keysToDelete);
                log.info("Deleted %d files", keysToDelete.size());
                keysToDelete.clear();
            }
        }
    }
    if (keysToDelete.size() > 0) {
        deleteBucketKeys(s3Client, bucket, keysToDelete);
        log.info("Deleted %d files", keysToDelete.size());
    }
}
Also used : CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) ArrayList(java.util.ArrayList) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest)

Example 23 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class S3DataSegmentPuller method getVersion.

/**
 * Returns the "version" (aka last modified timestamp) of the URI
 *
 * @param uri The URI to check the last timestamp
 * @return The time in ms of the last modification of the URI in String format
 * @throws IOException
 */
@Override
public String getVersion(URI uri) throws IOException {
    try {
        final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
        final S3ObjectSummary objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
        return StringUtils.format("%d", objectSummary.getLastModified().getTime());
    } catch (AmazonClientException e) {
        if (AWSClientUtil.isClientExceptionRecoverable(e)) {
            // The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
            throw new IOE(e, "Could not fetch last modified timestamp from URI [%s]", uri);
        } else {
            throw new RE(e, "Error fetching last modified timestamp from URI [%s]", uri);
        }
    }
}
Also used : RE(org.apache.druid.java.util.common.RE) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) AmazonClientException(com.amazonaws.AmazonClientException) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOE(org.apache.druid.java.util.common.IOE)

Example 24 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class S3DataSegmentPuller method buildFileObject.

public FileObject buildFileObject(final URI uri) throws AmazonServiceException {
    final CloudObjectLocation coords = new CloudObjectLocation(S3Utils.checkURI(uri));
    final String path = uri.getPath();
    return new FileObject() {

        S3Object s3Object = null;

        S3ObjectSummary objectSummary = null;

        @Override
        public URI toUri() {
            return uri;
        }

        @Override
        public String getName() {
            final String ext = Files.getFileExtension(path);
            return Files.getNameWithoutExtension(path) + (Strings.isNullOrEmpty(ext) ? "" : ("." + ext));
        }

        /**
         * Returns an input stream for a s3 object. The returned input stream is not thread-safe.
         */
        @Override
        public InputStream openInputStream() throws IOException {
            try {
                if (s3Object == null) {
                    // lazily promote to full GET
                    s3Object = s3Client.getObject(coords.getBucket(), coords.getPath());
                }
                final InputStream in = s3Object.getObjectContent();
                final Closer closer = Closer.create();
                closer.register(in);
                closer.register(s3Object);
                return new FilterInputStream(in) {

                    @Override
                    public void close() throws IOException {
                        closer.close();
                    }
                };
            } catch (AmazonServiceException e) {
                throw new IOE(e, "Could not load S3 URI [%s]", uri);
            }
        }

        @Override
        public OutputStream openOutputStream() {
            throw new UOE("Cannot stream S3 output");
        }

        @Override
        public Reader openReader(boolean ignoreEncodingErrors) {
            throw new UOE("Cannot open reader");
        }

        @Override
        public CharSequence getCharContent(boolean ignoreEncodingErrors) {
            throw new UOE("Cannot open character sequence");
        }

        @Override
        public Writer openWriter() {
            throw new UOE("Cannot open writer");
        }

        @Override
        public long getLastModified() {
            if (s3Object != null) {
                return s3Object.getObjectMetadata().getLastModified().getTime();
            }
            if (objectSummary == null) {
                objectSummary = S3Utils.getSingleObjectSummary(s3Client, coords.getBucket(), coords.getPath());
            }
            return objectSummary.getLastModified().getTime();
        }

        @Override
        public boolean delete() {
            throw new UOE("Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily.");
        }
    };
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) FilterInputStream(java.io.FilterInputStream) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) AmazonServiceException(com.amazonaws.AmazonServiceException) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) UOE(org.apache.druid.java.util.common.UOE) FileObject(javax.tools.FileObject) S3Object(com.amazonaws.services.s3.model.S3Object) IOE(org.apache.druid.java.util.common.IOE)

Example 25 with CloudObjectLocation

use of org.apache.druid.data.input.impl.CloudObjectLocation in project druid by druid-io.

the class GoogleTimestampVersionedDataFinder method getLatestVersion.

@Override
public URI getLatestVersion(URI descriptorBase, @Nullable Pattern pattern) {
    try {
        long mostRecent = Long.MIN_VALUE;
        URI latest = null;
        final CloudObjectLocation baseLocation = new CloudObjectLocation(descriptorBase);
        final Objects objects = storage.list(baseLocation.getBucket()).setPrefix(baseLocation.getPath()).setMaxResults(MAX_LISTING_KEYS).execute();
        for (StorageObject storageObject : objects.getItems()) {
            if (GoogleUtils.isDirectoryPlaceholder(storageObject)) {
                continue;
            }
            // remove path prefix from file name
            final CloudObjectLocation objectLocation = new CloudObjectLocation(storageObject.getBucket(), storageObject.getName());
            final String keyString = StringUtils.maybeRemoveLeadingSlash(storageObject.getName().substring(baseLocation.getPath().length()));
            if (pattern != null && !pattern.matcher(keyString).matches()) {
                continue;
            }
            final long latestModified = storageObject.getUpdated().getValue();
            if (latestModified >= mostRecent) {
                mostRecent = latestModified;
                latest = objectLocation.toUri(GoogleStorageDruidModule.SCHEME_GS);
            }
        }
        return latest;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : StorageObject(com.google.api.services.storage.model.StorageObject) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) Objects(com.google.api.services.storage.model.Objects) IOException(java.io.IOException) URI(java.net.URI)

Aggregations

CloudObjectLocation (org.apache.druid.data.input.impl.CloudObjectLocation)34 Test (org.junit.Test)21 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)15 InputSplit (org.apache.druid.data.input.InputSplit)13 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)11 OSSObjectSummary (com.aliyun.oss.model.OSSObjectSummary)6 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)6 MaxSizeSplitHintSpec (org.apache.druid.data.input.MaxSizeSplitHintSpec)6 URI (java.net.URI)5 File (java.io.File)4 FileInputStream (java.io.FileInputStream)4 FileOutputStream (java.io.FileOutputStream)4 OutputStream (java.io.OutputStream)4 Date (java.util.Date)4 GZIPOutputStream (java.util.zip.GZIPOutputStream)4 FileUtils (org.apache.druid.java.util.common.FileUtils)4 IOE (org.apache.druid.java.util.common.IOE)4 OSSException (com.aliyun.oss.OSSException)3 OSSObject (com.aliyun.oss.model.OSSObject)3 S3Object (com.amazonaws.services.s3.model.S3Object)3