Search in sources :

Example 1 with Transfer

use of com.amazonaws.services.s3.transfer.Transfer in project hadoop by apache.

the class S3AFileSystem method initialize.

/** Called after a new FileSystem instance is constructed.
   * @param name a uri whose authority section names the host, port, etc.
   *   for this FileSystem
   * @param originalConf the configuration to use for the FS. The
   * bucket-specific options are patched over the base ones before any use is
   * made of the config.
   */
public void initialize(URI name, Configuration originalConf) throws IOException {
    uri = S3xLoginHelper.buildFSURI(name);
    // get the host; this is guaranteed to be non-null, non-empty
    bucket = name.getHost();
    // clone the configuration into one with propagated bucket options
    Configuration conf = propagateBucketOptions(originalConf, bucket);
    patchSecurityCredentialProviders(conf);
    super.initialize(name, conf);
    setConf(conf);
    try {
        instrumentation = new S3AInstrumentation(name);
        // Username is the current user at the time the FS was instantiated.
        username = UserGroupInformation.getCurrentUser().getShortUserName();
        workingDir = new Path("/user", username).makeQualified(this.uri, this.getWorkingDirectory());
        Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass(S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class);
        s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf).createS3Client(name, uri);
        maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
        listing = new Listing(this);
        partSize = getMultipartSizeProperty(conf, MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
        multiPartThreshold = getMultipartSizeProperty(conf, MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
        //check but do not store the block size
        longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
        enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
        readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
        storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE.put(S3AStorageStatistics.NAME, new GlobalStorageStatistics.StorageStatisticsProvider() {

            @Override
            public StorageStatistics provide() {
                return new S3AStorageStatistics();
            }
        });
        int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
        if (maxThreads < 2) {
            LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
            maxThreads = 2;
        }
        int totalTasks = intOption(conf, MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
        long keepAliveTime = longOption(conf, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME, 0);
        boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared");
        unboundedThreadPool = new ThreadPoolExecutor(maxThreads, Integer.MAX_VALUE, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), BlockingThreadPoolExecutorService.newDaemonThreadFactory("s3a-transfer-unbounded"));
        initTransferManager();
        initCannedAcls(conf);
        verifyBucketExists();
        initMultipartUploads(conf);
        serverSideEncryptionAlgorithm = S3AEncryptionMethods.getMethod(conf.getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM));
        if (S3AEncryptionMethods.SSE_C.equals(serverSideEncryptionAlgorithm) && StringUtils.isBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_C_NO_KEY_ERROR);
        }
        if (S3AEncryptionMethods.SSE_S3.equals(serverSideEncryptionAlgorithm) && StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_S3_WITH_KEY_ERROR);
        }
        LOG.debug("Using encryption {}", serverSideEncryptionAlgorithm);
        inputPolicy = S3AInputPolicy.getPolicy(conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
        blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD);
        if (blockUploadEnabled) {
            blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER);
            partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
            blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
            blockOutputActiveBlocks = intOption(conf, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
            LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + " queue limit={}", blockOutputBuffer, partSize, blockOutputActiveBlocks);
        } else {
            LOG.debug("Using S3AOutputStream");
        }
    } catch (AmazonClientException e) {
        throw translateException("initializing ", new Path(name), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TransferManagerConfiguration(com.amazonaws.services.s3.transfer.TransferManagerConfiguration) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) AmazonClientException(com.amazonaws.AmazonClientException) PathIOException(org.apache.hadoop.fs.PathIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics)

Example 2 with Transfer

use of com.amazonaws.services.s3.transfer.Transfer in project hadoop by apache.

the class S3AFileSystem method putObjectDirect.

/**
   * PUT an object directly (i.e. not via the transfer manager).
   * Byte length is calculated from the file length, or, if there is no
   * file, from the content length of the header.
   * <i>Important: this call will close any input stream in the request.</i>
   * @param putObjectRequest the request
   * @return the upload initiated
   * @throws AmazonClientException on problems
   */
public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) throws AmazonClientException {
    long len;
    if (putObjectRequest.getFile() != null) {
        len = putObjectRequest.getFile().length();
    } else {
        len = putObjectRequest.getMetadata().getContentLength();
    }
    incrementPutStartStatistics(len);
    try {
        PutObjectResult result = s3.putObject(putObjectRequest);
        incrementPutCompletedStatistics(true, len);
        return result;
    } catch (AmazonClientException e) {
        incrementPutCompletedStatistics(false, len);
        throw e;
    }
}
Also used : PutObjectResult(com.amazonaws.services.s3.model.PutObjectResult) AmazonClientException(com.amazonaws.AmazonClientException)

Example 3 with Transfer

use of com.amazonaws.services.s3.transfer.Transfer in project alluxio by Alluxio.

the class S3AUnderFileSystem method createInstance.

/**
   * Constructs a new instance of {@link S3AUnderFileSystem}.
   *
   * @param uri the {@link AlluxioURI} for this UFS
   * @return the created {@link S3AUnderFileSystem} instance
   */
public static S3AUnderFileSystem createInstance(AlluxioURI uri) {
    String bucketName = uri.getHost();
    // Set the aws credential system properties based on Alluxio properties, if they are set
    if (Configuration.containsKey(PropertyKey.S3A_ACCESS_KEY)) {
        System.setProperty(SDKGlobalConfiguration.ACCESS_KEY_SYSTEM_PROPERTY, Configuration.get(PropertyKey.S3A_ACCESS_KEY));
    }
    if (Configuration.containsKey(PropertyKey.S3A_SECRET_KEY)) {
        System.setProperty(SDKGlobalConfiguration.SECRET_KEY_SYSTEM_PROPERTY, Configuration.get(PropertyKey.S3A_SECRET_KEY));
    }
    // Checks, in order, env variables, system properties, profile file, and instance profile
    AWSCredentialsProvider credentials = new AWSCredentialsProviderChain(new DefaultAWSCredentialsProviderChain());
    // Set the client configuration based on Alluxio configuration values
    ClientConfiguration clientConf = new ClientConfiguration();
    // Socket timeout
    clientConf.setSocketTimeout(Configuration.getInt(PropertyKey.UNDERFS_S3A_SOCKET_TIMEOUT_MS));
    // HTTP protocol
    if (Configuration.getBoolean(PropertyKey.UNDERFS_S3A_SECURE_HTTP_ENABLED)) {
        clientConf.setProtocol(Protocol.HTTPS);
    } else {
        clientConf.setProtocol(Protocol.HTTP);
    }
    // Proxy host
    if (Configuration.containsKey(PropertyKey.UNDERFS_S3_PROXY_HOST)) {
        clientConf.setProxyHost(Configuration.get(PropertyKey.UNDERFS_S3_PROXY_HOST));
    }
    // Proxy port
    if (Configuration.containsKey(PropertyKey.UNDERFS_S3_PROXY_PORT)) {
        clientConf.setProxyPort(Configuration.getInt(PropertyKey.UNDERFS_S3_PROXY_PORT));
    }
    int numAdminThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_ADMIN_THREADS_MAX);
    int numTransferThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_UPLOAD_THREADS_MAX);
    int numThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_THREADS_MAX);
    if (numThreads < numAdminThreads + numTransferThreads) {
        LOG.warn("Configured s3 max threads: {} is less than # admin threads: {} plus transfer " + "threads {}. Using admin threads + transfer threads as max threads instead.");
        numThreads = numAdminThreads + numTransferThreads;
    }
    clientConf.setMaxConnections(numThreads);
    // Set client request timeout for all requests since multipart copy is used, and copy parts can
    // only be set with the client configuration.
    clientConf.setRequestTimeout(Configuration.getInt(PropertyKey.UNDERFS_S3A_REQUEST_TIMEOUT));
    AmazonS3Client amazonS3Client = new AmazonS3Client(credentials, clientConf);
    // Set a custom endpoint.
    if (Configuration.containsKey(PropertyKey.UNDERFS_S3_ENDPOINT)) {
        amazonS3Client.setEndpoint(Configuration.get(PropertyKey.UNDERFS_S3_ENDPOINT));
    }
    // Disable DNS style buckets, this enables path style requests.
    if (Configuration.getBoolean(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)) {
        S3ClientOptions clientOptions = S3ClientOptions.builder().setPathStyleAccess(true).build();
        amazonS3Client.setS3ClientOptions(clientOptions);
    }
    ExecutorService service = ExecutorServiceFactories.fixedThreadPoolExecutorServiceFactory("alluxio-s3-transfer-manager-worker", numTransferThreads).create();
    TransferManager transferManager = new TransferManager(amazonS3Client, service);
    TransferManagerConfiguration transferConf = new TransferManagerConfiguration();
    transferConf.setMultipartCopyThreshold(MULTIPART_COPY_THRESHOLD);
    transferManager.setConfiguration(transferConf);
    // Default to readable and writable by the user.
    short bucketMode = (short) 700;
    // There is no known account owner by default.
    String accountOwner = "";
    // if ACL enabled inherit bucket acl for all the objects.
    if (Configuration.getBoolean(PropertyKey.UNDERFS_S3A_INHERIT_ACL)) {
        String accountOwnerId = amazonS3Client.getS3AccountOwner().getId();
        // Gets the owner from user-defined static mapping from S3 canonical user
        // id to Alluxio user name.
        String owner = CommonUtils.getValueFromStaticMapping(Configuration.get(PropertyKey.UNDERFS_S3_OWNER_ID_TO_USERNAME_MAPPING), accountOwnerId);
        // If there is no user-defined mapping, use the display name.
        if (owner == null) {
            owner = amazonS3Client.getS3AccountOwner().getDisplayName();
        }
        accountOwner = owner == null ? accountOwnerId : owner;
        AccessControlList acl = amazonS3Client.getBucketAcl(bucketName);
        bucketMode = S3AUtils.translateBucketAcl(acl, accountOwnerId);
    }
    return new S3AUnderFileSystem(uri, amazonS3Client, bucketName, bucketMode, accountOwner, transferManager);
}
Also used : DefaultAWSCredentialsProviderChain(com.amazonaws.auth.DefaultAWSCredentialsProviderChain) AccessControlList(com.amazonaws.services.s3.model.AccessControlList) TransferManager(com.amazonaws.services.s3.transfer.TransferManager) AWSCredentialsProviderChain(com.amazonaws.auth.AWSCredentialsProviderChain) DefaultAWSCredentialsProviderChain(com.amazonaws.auth.DefaultAWSCredentialsProviderChain) AmazonS3Client(com.amazonaws.services.s3.AmazonS3Client) TransferManagerConfiguration(com.amazonaws.services.s3.transfer.TransferManagerConfiguration) S3ClientOptions(com.amazonaws.services.s3.S3ClientOptions) ExecutorService(java.util.concurrent.ExecutorService) AWSCredentialsProvider(com.amazonaws.auth.AWSCredentialsProvider) ClientConfiguration(com.amazonaws.ClientConfiguration)

Example 4 with Transfer

use of com.amazonaws.services.s3.transfer.Transfer in project jackrabbit-oak by apache.

the class S3Backend method init.

public void init(CachingDataStore store, String homeDir, Properties prop) throws DataStoreException {
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        startTime = new Date();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        LOG.debug("init");
        this.store = store;
        s3ReqDecorator = new S3RequestDecorator(prop);
        s3service = Utils.openService(prop);
        if (bucket == null || "".equals(bucket.trim())) {
            bucket = prop.getProperty(S3Constants.S3_BUCKET);
            // Alternately check if the 'container' property is set
            if (Strings.isNullOrEmpty(bucket)) {
                bucket = prop.getProperty(S3Constants.S3_CONTAINER);
            }
        }
        String region = prop.getProperty(S3Constants.S3_REGION);
        Region s3Region = null;
        if (StringUtils.isNullOrEmpty(region)) {
            com.amazonaws.regions.Region ec2Region = Regions.getCurrentRegion();
            if (ec2Region != null) {
                s3Region = Region.fromValue(ec2Region.getName());
            } else {
                throw new AmazonClientException("parameter [" + S3Constants.S3_REGION + "] not configured and cannot be derived from environment");
            }
        } else {
            if (Utils.DEFAULT_AWS_BUCKET_REGION.equals(region)) {
                s3Region = Region.US_Standard;
            } else if (Region.EU_Ireland.toString().equals(region)) {
                s3Region = Region.EU_Ireland;
            } else {
                s3Region = Region.fromValue(region);
            }
        }
        if (!s3service.doesBucketExist(bucket)) {
            s3service.createBucket(bucket, s3Region);
            LOG.info("Created bucket [{}] in [{}] ", bucket, region);
        } else {
            LOG.info("Using bucket [{}] in [{}] ", bucket, region);
        }
        int writeThreads = 10;
        String writeThreadsStr = prop.getProperty(S3Constants.S3_WRITE_THREADS);
        if (writeThreadsStr != null) {
            writeThreads = Integer.parseInt(writeThreadsStr);
        }
        LOG.info("Using thread pool of [{}] threads in S3 transfer manager.", writeThreads);
        tmx = new TransferManager(s3service, (ThreadPoolExecutor) Executors.newFixedThreadPool(writeThreads, new NamedThreadFactory("s3-transfer-manager-worker")));
        int asyncWritePoolSize = 10;
        String maxConnsStr = prop.getProperty(S3Constants.S3_MAX_CONNS);
        if (maxConnsStr != null) {
            asyncWritePoolSize = Integer.parseInt(maxConnsStr) - writeThreads;
        }
        asyncWriteExecuter = (ThreadPoolExecutor) Executors.newFixedThreadPool(asyncWritePoolSize, new NamedThreadFactory("s3-write-worker"));
        String renameKeyProp = prop.getProperty(S3Constants.S3_RENAME_KEYS);
        boolean renameKeyBool = (renameKeyProp == null || "".equals(renameKeyProp)) ? false : Boolean.parseBoolean(renameKeyProp);
        LOG.info("Rename keys [{}]", renameKeyBool);
        if (renameKeyBool) {
            renameKeys();
        }
        LOG.debug("S3 Backend initialized in [{}] ms", +(System.currentTimeMillis() - startTime.getTime()));
    } catch (Exception e) {
        LOG.debug("  error ", e);
        Map<String, String> filteredMap = Maps.newHashMap();
        if (prop != null) {
            filteredMap = Maps.filterKeys(Maps.fromProperties(prop), new Predicate<String>() {

                @Override
                public boolean apply(String input) {
                    return !input.equals(S3Constants.ACCESS_KEY) && !input.equals(S3Constants.SECRET_KEY);
                }
            });
        }
        throw new DataStoreException("Could not initialize S3 from " + filteredMap, e);
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
}
Also used : TransferManager(com.amazonaws.services.s3.transfer.TransferManager) DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) NamedThreadFactory(org.apache.jackrabbit.core.data.util.NamedThreadFactory) AmazonClientException(com.amazonaws.AmazonClientException) Date(java.util.Date) AmazonServiceException(com.amazonaws.AmazonServiceException) AmazonClientException(com.amazonaws.AmazonClientException) DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) IOException(java.io.IOException) S3RequestDecorator(org.apache.jackrabbit.oak.blob.cloud.s3.S3RequestDecorator) Region(com.amazonaws.services.s3.model.Region) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) Map(java.util.Map)

Example 5 with Transfer

use of com.amazonaws.services.s3.transfer.Transfer in project jackrabbit-oak by apache.

the class S3Backend method init.

public void init() throws DataStoreException {
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        startTime = new Date();
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        LOG.debug("init");
        s3ReqDecorator = new S3RequestDecorator(properties);
        s3service = Utils.openService(properties);
        if (bucket == null || "".equals(bucket.trim())) {
            bucket = properties.getProperty(S3Constants.S3_BUCKET);
            // Alternately check if the 'container' property is set
            if (Strings.isNullOrEmpty(bucket)) {
                bucket = properties.getProperty(S3Constants.S3_CONTAINER);
            }
        }
        String region = properties.getProperty(S3Constants.S3_REGION);
        Region s3Region;
        if (StringUtils.isNullOrEmpty(region)) {
            com.amazonaws.regions.Region ec2Region = Regions.getCurrentRegion();
            if (ec2Region != null) {
                s3Region = Region.fromValue(ec2Region.getName());
            } else {
                throw new AmazonClientException("parameter [" + S3Constants.S3_REGION + "] not configured and cannot be derived from environment");
            }
        } else {
            if (Utils.DEFAULT_AWS_BUCKET_REGION.equals(region)) {
                s3Region = Region.US_Standard;
            } else if (Region.EU_Ireland.toString().equals(region)) {
                s3Region = Region.EU_Ireland;
            } else {
                s3Region = Region.fromValue(region);
            }
        }
        if (!s3service.doesBucketExist(bucket)) {
            s3service.createBucket(bucket, s3Region);
            LOG.info("Created bucket [{}] in [{}] ", bucket, region);
        } else {
            LOG.info("Using bucket [{}] in [{}] ", bucket, region);
        }
        int writeThreads = 10;
        String writeThreadsStr = properties.getProperty(S3Constants.S3_WRITE_THREADS);
        if (writeThreadsStr != null) {
            writeThreads = Integer.parseInt(writeThreadsStr);
        }
        LOG.info("Using thread pool of [{}] threads in S3 transfer manager.", writeThreads);
        tmx = new TransferManager(s3service, Executors.newFixedThreadPool(writeThreads, new NamedThreadFactory("s3-transfer-manager-worker")));
        String renameKeyProp = properties.getProperty(S3Constants.S3_RENAME_KEYS);
        boolean renameKeyBool = (renameKeyProp == null || "".equals(renameKeyProp)) ? false : Boolean.parseBoolean(renameKeyProp);
        LOG.info("Rename keys [{}]", renameKeyBool);
        if (renameKeyBool) {
            renameKeys();
        }
        LOG.debug("S3 Backend initialized in [{}] ms", +(System.currentTimeMillis() - startTime.getTime()));
    } catch (Exception e) {
        LOG.error("Error ", e);
        Map<String, Object> filteredMap = Maps.newHashMap();
        if (properties != null) {
            filteredMap = Maps.filterKeys(Utils.asMap(properties), new Predicate<String>() {

                @Override
                public boolean apply(String input) {
                    return !input.equals(S3Constants.ACCESS_KEY) && !input.equals(S3Constants.SECRET_KEY);
                }
            });
        }
        throw new DataStoreException("Could not initialize S3 from " + filteredMap, e);
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
}
Also used : TransferManager(com.amazonaws.services.s3.transfer.TransferManager) DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) NamedThreadFactory(org.apache.jackrabbit.core.data.util.NamedThreadFactory) AmazonClientException(com.amazonaws.AmazonClientException) Date(java.util.Date) AmazonServiceException(com.amazonaws.AmazonServiceException) AmazonClientException(com.amazonaws.AmazonClientException) DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) IOException(java.io.IOException) Region(com.amazonaws.services.s3.model.Region) Map(java.util.Map)

Aggregations

File (java.io.File)18 S3FileTransferRequestParamsDto (org.finra.herd.model.dto.S3FileTransferRequestParamsDto)18 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)14 Test (org.junit.Test)14 TransferManager (com.amazonaws.services.s3.transfer.TransferManager)13 AmazonClientException (com.amazonaws.AmazonClientException)12 AmazonServiceException (com.amazonaws.AmazonServiceException)10 Tag (com.amazonaws.services.s3.model.Tag)8 Transfer (com.amazonaws.services.s3.transfer.Transfer)8 IOException (java.io.IOException)8 AmazonS3Client (com.amazonaws.services.s3.AmazonS3Client)7 ObjectMetadata (com.amazonaws.services.s3.model.ObjectMetadata)7 S3FileTransferResultsDto (org.finra.herd.model.dto.S3FileTransferResultsDto)7 PutObjectRequest (com.amazonaws.services.s3.model.PutObjectRequest)5 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)5 BusinessObjectDataKey (org.finra.herd.model.api.xml.BusinessObjectDataKey)5 StorageFile (org.finra.herd.model.api.xml.StorageFile)5 ClientConfiguration (com.amazonaws.ClientConfiguration)4 TransferState (com.amazonaws.services.s3.transfer.Transfer.TransferState)4 ArrayList (java.util.ArrayList)4