Search in sources :

Example 6 with Bucket

use of in project hadoop by apache.

the class S3AFileSystem method copyFile.

   * Copy a single object in the bucket via a COPY operation.
   * @param srcKey source object path
   * @param dstKey destination object path
   * @param size object size
   * @throws AmazonClientException on failures inside the AWS SDK
   * @throws InterruptedIOException the operation was interrupted
   * @throws IOException Other IO problems
private void copyFile(String srcKey, String dstKey, long size) throws IOException, InterruptedIOException, AmazonClientException {
    LOG.debug("copyFile {} -> {} ", srcKey, dstKey);
    try {
        ObjectMetadata srcom = getObjectMetadata(srcKey);
        ObjectMetadata dstom = cloneObjectMetadata(srcom);
        CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucket, srcKey, bucket, dstKey);
        ProgressListener progressListener = new ProgressListener() {

            public void progressChanged(ProgressEvent progressEvent) {
                switch(progressEvent.getEventType()) {
                    case TRANSFER_PART_COMPLETED_EVENT:
        Copy copy = transfers.copy(copyObjectRequest);
        try {
            instrumentation.filesCopied(1, size);
        } catch (InterruptedException e) {
            throw new InterruptedIOException("Interrupted copying " + srcKey + " to " + dstKey + ", cancelling");
    } catch (AmazonClientException e) {
        throw translateException("copyFile(" + srcKey + ", " + dstKey + ")", srcKey, e);
Also used : InterruptedIOException( CopyObjectRequest( ProgressListener(com.amazonaws.event.ProgressListener) Copy( AmazonClientException(com.amazonaws.AmazonClientException) ProgressEvent(com.amazonaws.event.ProgressEvent) ObjectMetadata(

Example 7 with Bucket

use of in project hadoop by apache.

the class S3AFileSystem method newPutObjectRequest.

   * Create a {@link PutObjectRequest} request.
   * The metadata is assumed to have been configured with the size of the
   * operation.
   * @param key key of object
   * @param metadata metadata header
   * @param inputStream source data.
   * @return the request
private PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, InputStream inputStream) {
    PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key, inputStream, metadata);
    return putObjectRequest;
Also used : PutObjectRequest(

Example 8 with Bucket

use of in project hadoop by apache.

the class S3AFileSystem method initialize.

/** Called after a new FileSystem instance is constructed.
   * @param name a uri whose authority section names the host, port, etc.
   *   for this FileSystem
   * @param originalConf the configuration to use for the FS. The
   * bucket-specific options are patched over the base ones before any use is
   * made of the config.
public void initialize(URI name, Configuration originalConf) throws IOException {
    uri = S3xLoginHelper.buildFSURI(name);
    // get the host; this is guaranteed to be non-null, non-empty
    bucket = name.getHost();
    // clone the configuration into one with propagated bucket options
    Configuration conf = propagateBucketOptions(originalConf, bucket);
    super.initialize(name, conf);
    try {
        instrumentation = new S3AInstrumentation(name);
        // Username is the current user at the time the FS was instantiated.
        username = UserGroupInformation.getCurrentUser().getShortUserName();
        workingDir = new Path("/user", username).makeQualified(this.uri, this.getWorkingDirectory());
        Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass(S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class);
        s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf).createS3Client(name, uri);
        maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
        listing = new Listing(this);
        partSize = getMultipartSizeProperty(conf, MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
        multiPartThreshold = getMultipartSizeProperty(conf, MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
        //check but do not store the block size
        longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
        enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
        readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
        storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE.put(S3AStorageStatistics.NAME, new GlobalStorageStatistics.StorageStatisticsProvider() {

            public StorageStatistics provide() {
                return new S3AStorageStatistics();
        int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
        if (maxThreads < 2) {
            LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
            maxThreads = 2;
        int totalTasks = intOption(conf, MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
        long keepAliveTime = longOption(conf, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME, 0);
        boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared");
        unboundedThreadPool = new ThreadPoolExecutor(maxThreads, Integer.MAX_VALUE, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), BlockingThreadPoolExecutorService.newDaemonThreadFactory("s3a-transfer-unbounded"));
        serverSideEncryptionAlgorithm = S3AEncryptionMethods.getMethod(conf.getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM));
        if (S3AEncryptionMethods.SSE_C.equals(serverSideEncryptionAlgorithm) && StringUtils.isBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_C_NO_KEY_ERROR);
        if (S3AEncryptionMethods.SSE_S3.equals(serverSideEncryptionAlgorithm) && StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_S3_WITH_KEY_ERROR);
        LOG.debug("Using encryption {}", serverSideEncryptionAlgorithm);
        inputPolicy = S3AInputPolicy.getPolicy(conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
        blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD);
        if (blockUploadEnabled) {
            blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER);
            partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
            blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
            blockOutputActiveBlocks = intOption(conf, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
            LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + " queue limit={}", blockOutputBuffer, partSize, blockOutputActiveBlocks);
        } else {
            LOG.debug("Using S3AOutputStream");
    } catch (AmazonClientException e) {
        throw translateException("initializing ", new Path(name), e);
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TransferManagerConfiguration( GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) AmazonClientException(com.amazonaws.AmazonClientException) PathIOException(org.apache.hadoop.fs.PathIOException) InterruptedIOException( IOException( LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ObjectListing( ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics)

Example 9 with Bucket

use of in project hadoop by apache.

the class S3AUtils method translateException.

   * Translate an exception raised in an operation into an IOException.
   * The specific type of IOException depends on the class of
   * {@link AmazonClientException} passed in, and any status codes included
   * in the operation. That is: HTTP error codes are examined and can be
   * used to build a more specific response.
   * @param operation operation
   * @param path path operated on (may be null)
   * @param exception amazon exception raised
   * @return an IOE which wraps the caught exception.
public static IOException translateException(String operation, String path, AmazonClientException exception) {
    String message = String.format("%s%s: %s", operation, path != null ? (" on " + path) : "", exception);
    if (!(exception instanceof AmazonServiceException)) {
        if (containsInterruptedException(exception)) {
            return (IOException) new InterruptedIOException(message).initCause(exception);
        return new AWSClientIOException(message, exception);
    } else {
        IOException ioe;
        AmazonServiceException ase = (AmazonServiceException) exception;
        // this exception is non-null if the service exception is an s3 one
        AmazonS3Exception s3Exception = ase instanceof AmazonS3Exception ? (AmazonS3Exception) ase : null;
        int status = ase.getStatusCode();
        switch(status) {
            case 301:
                if (s3Exception != null) {
                    if (s3Exception.getAdditionalDetails() != null && s3Exception.getAdditionalDetails().containsKey(ENDPOINT_KEY)) {
                        message = String.format("Received permanent redirect response to " + "endpoint %s.  This likely indicates that the S3 endpoint " + "configured in %s does not match the AWS region containing " + "the bucket.", s3Exception.getAdditionalDetails().get(ENDPOINT_KEY), ENDPOINT);
                    ioe = new AWSS3IOException(message, s3Exception);
                } else {
                    ioe = new AWSServiceIOException(message, ase);
            // permissions
            case 401:
            case 403:
                ioe = new AccessDeniedException(path, null, message);
            // the object isn't there
            case 404:
            case 410:
                ioe = new FileNotFoundException(message);
            // a shorter one while it is being read.
            case 416:
                ioe = new EOFException(message);
                // no specific exit code. Choose an IOE subclass based on the class
                // of the caught exception
                ioe = s3Exception != null ? new AWSS3IOException(message, s3Exception) : new AWSServiceIOException(message, ase);
        return ioe;
Also used : InterruptedIOException( AccessDeniedException(java.nio.file.AccessDeniedException) AmazonServiceException(com.amazonaws.AmazonServiceException) FileNotFoundException( EOFException( InterruptedIOException( IOException( AmazonS3Exception(

Example 10 with Bucket

use of in project hadoop by apache.

the class S3AFileSystem method innerRename.

   * The inner rename operation. See {@link #rename(Path, Path)} for
   * the description of the operation.
   * This operation throws an exception on any failure which needs to be
   * reported and downgraded to a failure. That is: if a rename
   * @param src path to be renamed
   * @param dst new path after rename
   * @throws RenameFailedException if some criteria for a state changing
   * rename was not met. This means work didn't happen; it's not something
   * which is reported upstream to the FileSystem APIs, for which the semantics
   * of "false" are pretty vague.
   * @throws FileNotFoundException there's no source file.
   * @throws IOException on IO failure.
   * @throws AmazonClientException on failures inside the AWS SDK
private boolean innerRename(Path src, Path dst) throws RenameFailedException, FileNotFoundException, IOException, AmazonClientException {
    LOG.debug("Rename path {} to {}", src, dst);
    String srcKey = pathToKey(src);
    String dstKey = pathToKey(dst);
    if (srcKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "source is root directory");
    if (dstKey.isEmpty()) {
        throw new RenameFailedException(src, dst, "dest is root directory");
    // get the source file status; this raises a FNFE if there is no source
    // file.
    S3AFileStatus srcStatus = getFileStatus(src);
    if (srcKey.equals(dstKey)) {
        LOG.debug("rename: src and dest refer to the same file or directory: {}", dst);
        throw new RenameFailedException(src, dst, "source and dest refer to the same file or directory").withExitCode(srcStatus.isFile());
    S3AFileStatus dstStatus = null;
    try {
        dstStatus = getFileStatus(dst);
        // whether or not it can be the destination of the rename.
        if (srcStatus.isDirectory()) {
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "source is a directory and dest is a file").withExitCode(srcStatus.isFile());
            } else if (!dstStatus.isEmptyDirectory()) {
                throw new RenameFailedException(src, dst, "Destination is a non-empty directory").withExitCode(false);
        // at this point the destination is an empty directory
        } else {
            // empty or not
            if (dstStatus.isFile()) {
                throw new RenameFailedException(src, dst, "Cannot rename onto an existing file").withExitCode(false);
    } catch (FileNotFoundException e) {
        LOG.debug("rename: destination path {} not found", dst);
        // Parent must exist
        Path parent = dst.getParent();
        if (!pathToKey(parent).isEmpty()) {
            try {
                S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
                if (!dstParentStatus.isDirectory()) {
                    throw new RenameFailedException(src, dst, "destination parent is not a directory");
            } catch (FileNotFoundException e2) {
                throw new RenameFailedException(src, dst, "destination has no parent ");
    // Ok! Time to start
    if (srcStatus.isFile()) {
        LOG.debug("rename: renaming file {} to {}", src, dst);
        if (dstStatus != null && dstStatus.isDirectory()) {
            String newDstKey = dstKey;
            if (!newDstKey.endsWith("/")) {
                newDstKey = newDstKey + "/";
            String filename = srcKey.substring(pathToKey(src.getParent()).length() + 1);
            newDstKey = newDstKey + filename;
            copyFile(srcKey, newDstKey, srcStatus.getLen());
        } else {
            copyFile(srcKey, dstKey, srcStatus.getLen());
        innerDelete(srcStatus, false);
    } else {
        LOG.debug("rename: renaming directory {} to {}", src, dst);
        // This is a directory to directory copy
        if (!dstKey.endsWith("/")) {
            dstKey = dstKey + "/";
        if (!srcKey.endsWith("/")) {
            srcKey = srcKey + "/";
        //Verify dest is not a child of the source directory
        if (dstKey.startsWith(srcKey)) {
            throw new RenameFailedException(srcKey, dstKey, "cannot rename a directory to a subdirectory o fitself ");
        List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
        if (dstStatus != null && dstStatus.isEmptyDirectory()) {
            // delete unnecessary fake directory.
            keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
        ListObjectsRequest request = new ListObjectsRequest();
        ObjectListing objects = listObjects(request);
        while (true) {
            for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                keysToDelete.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
                String newDstKey = dstKey + summary.getKey().substring(srcKey.length());
                copyFile(summary.getKey(), newDstKey, summary.getSize());
                if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
                    removeKeys(keysToDelete, true, false);
            if (objects.isTruncated()) {
                objects = continueListObjects(objects);
            } else {
                if (!keysToDelete.isEmpty()) {
                    removeKeys(keysToDelete, false, false);
    if (src.getParent() != dst.getParent()) {
    return true;
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException( ArrayList(java.util.ArrayList) ObjectListing( S3ObjectSummary( DeleteObjectsRequest( ListObjectsRequest(


AmazonServiceException (com.amazonaws.AmazonServiceException)44 AmazonS3 ( ObjectListing ( S3ObjectSummary ( ObjectMetadata ( DataStoreException ( PutObjectRequest ( AmazonClientException (com.amazonaws.AmazonClientException)20 ArrayList (java.util.ArrayList)20 IOException ( AmazonS3Client ( ListObjectsRequest ( Test (org.junit.Test)15 DeleteObjectsRequest ( S3Object ( Date (java.util.Date)11 Path (org.apache.hadoop.fs.Path)11 Bucket ( CopyObjectRequest ( Copy (