use of com.amazonaws.services.s3.transfer.Transfer in project hadoop by apache.
the class S3AFileSystem method initialize.
/** Called after a new FileSystem instance is constructed.
* @param name a uri whose authority section names the host, port, etc.
* for this FileSystem
* @param originalConf the configuration to use for the FS. The
* bucket-specific options are patched over the base ones before any use is
* made of the config.
*/
public void initialize(URI name, Configuration originalConf) throws IOException {
uri = S3xLoginHelper.buildFSURI(name);
// get the host; this is guaranteed to be non-null, non-empty
bucket = name.getHost();
// clone the configuration into one with propagated bucket options
Configuration conf = propagateBucketOptions(originalConf, bucket);
patchSecurityCredentialProviders(conf);
super.initialize(name, conf);
setConf(conf);
try {
instrumentation = new S3AInstrumentation(name);
// Username is the current user at the time the FS was instantiated.
username = UserGroupInformation.getCurrentUser().getShortUserName();
workingDir = new Path("/user", username).makeQualified(this.uri, this.getWorkingDirectory());
Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass(S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class);
s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf).createS3Client(name, uri);
maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
listing = new Listing(this);
partSize = getMultipartSizeProperty(conf, MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
multiPartThreshold = getMultipartSizeProperty(conf, MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
//check but do not store the block size
longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE.put(S3AStorageStatistics.NAME, new GlobalStorageStatistics.StorageStatisticsProvider() {
@Override
public StorageStatistics provide() {
return new S3AStorageStatistics();
}
});
int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
if (maxThreads < 2) {
LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
maxThreads = 2;
}
int totalTasks = intOption(conf, MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
long keepAliveTime = longOption(conf, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME, 0);
boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared");
unboundedThreadPool = new ThreadPoolExecutor(maxThreads, Integer.MAX_VALUE, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), BlockingThreadPoolExecutorService.newDaemonThreadFactory("s3a-transfer-unbounded"));
initTransferManager();
initCannedAcls(conf);
verifyBucketExists();
initMultipartUploads(conf);
serverSideEncryptionAlgorithm = S3AEncryptionMethods.getMethod(conf.getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM));
if (S3AEncryptionMethods.SSE_C.equals(serverSideEncryptionAlgorithm) && StringUtils.isBlank(getServerSideEncryptionKey(getConf()))) {
throw new IOException(Constants.SSE_C_NO_KEY_ERROR);
}
if (S3AEncryptionMethods.SSE_S3.equals(serverSideEncryptionAlgorithm) && StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
throw new IOException(Constants.SSE_S3_WITH_KEY_ERROR);
}
LOG.debug("Using encryption {}", serverSideEncryptionAlgorithm);
inputPolicy = S3AInputPolicy.getPolicy(conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD);
if (blockUploadEnabled) {
blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER);
partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
blockOutputActiveBlocks = intOption(conf, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + " queue limit={}", blockOutputBuffer, partSize, blockOutputActiveBlocks);
} else {
LOG.debug("Using S3AOutputStream");
}
} catch (AmazonClientException e) {
throw translateException("initializing ", new Path(name), e);
}
}
use of com.amazonaws.services.s3.transfer.Transfer in project hadoop by apache.
the class S3AFileSystem method putObjectDirect.
/**
* PUT an object directly (i.e. not via the transfer manager).
* Byte length is calculated from the file length, or, if there is no
* file, from the content length of the header.
* <i>Important: this call will close any input stream in the request.</i>
* @param putObjectRequest the request
* @return the upload initiated
* @throws AmazonClientException on problems
*/
public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) throws AmazonClientException {
long len;
if (putObjectRequest.getFile() != null) {
len = putObjectRequest.getFile().length();
} else {
len = putObjectRequest.getMetadata().getContentLength();
}
incrementPutStartStatistics(len);
try {
PutObjectResult result = s3.putObject(putObjectRequest);
incrementPutCompletedStatistics(true, len);
return result;
} catch (AmazonClientException e) {
incrementPutCompletedStatistics(false, len);
throw e;
}
}
use of com.amazonaws.services.s3.transfer.Transfer in project alluxio by Alluxio.
the class S3AUnderFileSystem method createInstance.
/**
* Constructs a new instance of {@link S3AUnderFileSystem}.
*
* @param uri the {@link AlluxioURI} for this UFS
* @return the created {@link S3AUnderFileSystem} instance
*/
public static S3AUnderFileSystem createInstance(AlluxioURI uri) {
String bucketName = uri.getHost();
// Set the aws credential system properties based on Alluxio properties, if they are set
if (Configuration.containsKey(PropertyKey.S3A_ACCESS_KEY)) {
System.setProperty(SDKGlobalConfiguration.ACCESS_KEY_SYSTEM_PROPERTY, Configuration.get(PropertyKey.S3A_ACCESS_KEY));
}
if (Configuration.containsKey(PropertyKey.S3A_SECRET_KEY)) {
System.setProperty(SDKGlobalConfiguration.SECRET_KEY_SYSTEM_PROPERTY, Configuration.get(PropertyKey.S3A_SECRET_KEY));
}
// Checks, in order, env variables, system properties, profile file, and instance profile
AWSCredentialsProvider credentials = new AWSCredentialsProviderChain(new DefaultAWSCredentialsProviderChain());
// Set the client configuration based on Alluxio configuration values
ClientConfiguration clientConf = new ClientConfiguration();
// Socket timeout
clientConf.setSocketTimeout(Configuration.getInt(PropertyKey.UNDERFS_S3A_SOCKET_TIMEOUT_MS));
// HTTP protocol
if (Configuration.getBoolean(PropertyKey.UNDERFS_S3A_SECURE_HTTP_ENABLED)) {
clientConf.setProtocol(Protocol.HTTPS);
} else {
clientConf.setProtocol(Protocol.HTTP);
}
// Proxy host
if (Configuration.containsKey(PropertyKey.UNDERFS_S3_PROXY_HOST)) {
clientConf.setProxyHost(Configuration.get(PropertyKey.UNDERFS_S3_PROXY_HOST));
}
// Proxy port
if (Configuration.containsKey(PropertyKey.UNDERFS_S3_PROXY_PORT)) {
clientConf.setProxyPort(Configuration.getInt(PropertyKey.UNDERFS_S3_PROXY_PORT));
}
int numAdminThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_ADMIN_THREADS_MAX);
int numTransferThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_UPLOAD_THREADS_MAX);
int numThreads = Configuration.getInt(PropertyKey.UNDERFS_S3_THREADS_MAX);
if (numThreads < numAdminThreads + numTransferThreads) {
LOG.warn("Configured s3 max threads: {} is less than # admin threads: {} plus transfer " + "threads {}. Using admin threads + transfer threads as max threads instead.");
numThreads = numAdminThreads + numTransferThreads;
}
clientConf.setMaxConnections(numThreads);
// Set client request timeout for all requests since multipart copy is used, and copy parts can
// only be set with the client configuration.
clientConf.setRequestTimeout(Configuration.getInt(PropertyKey.UNDERFS_S3A_REQUEST_TIMEOUT));
AmazonS3Client amazonS3Client = new AmazonS3Client(credentials, clientConf);
// Set a custom endpoint.
if (Configuration.containsKey(PropertyKey.UNDERFS_S3_ENDPOINT)) {
amazonS3Client.setEndpoint(Configuration.get(PropertyKey.UNDERFS_S3_ENDPOINT));
}
// Disable DNS style buckets, this enables path style requests.
if (Configuration.getBoolean(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)) {
S3ClientOptions clientOptions = S3ClientOptions.builder().setPathStyleAccess(true).build();
amazonS3Client.setS3ClientOptions(clientOptions);
}
ExecutorService service = ExecutorServiceFactories.fixedThreadPoolExecutorServiceFactory("alluxio-s3-transfer-manager-worker", numTransferThreads).create();
TransferManager transferManager = new TransferManager(amazonS3Client, service);
TransferManagerConfiguration transferConf = new TransferManagerConfiguration();
transferConf.setMultipartCopyThreshold(MULTIPART_COPY_THRESHOLD);
transferManager.setConfiguration(transferConf);
// Default to readable and writable by the user.
short bucketMode = (short) 700;
// There is no known account owner by default.
String accountOwner = "";
// if ACL enabled inherit bucket acl for all the objects.
if (Configuration.getBoolean(PropertyKey.UNDERFS_S3A_INHERIT_ACL)) {
String accountOwnerId = amazonS3Client.getS3AccountOwner().getId();
// Gets the owner from user-defined static mapping from S3 canonical user
// id to Alluxio user name.
String owner = CommonUtils.getValueFromStaticMapping(Configuration.get(PropertyKey.UNDERFS_S3_OWNER_ID_TO_USERNAME_MAPPING), accountOwnerId);
// If there is no user-defined mapping, use the display name.
if (owner == null) {
owner = amazonS3Client.getS3AccountOwner().getDisplayName();
}
accountOwner = owner == null ? accountOwnerId : owner;
AccessControlList acl = amazonS3Client.getBucketAcl(bucketName);
bucketMode = S3AUtils.translateBucketAcl(acl, accountOwnerId);
}
return new S3AUnderFileSystem(uri, amazonS3Client, bucketName, bucketMode, accountOwner, transferManager);
}
use of com.amazonaws.services.s3.transfer.Transfer in project jackrabbit-oak by apache.
the class S3Backend method init.
public void init(CachingDataStore store, String homeDir, Properties prop) throws DataStoreException {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
startTime = new Date();
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
LOG.debug("init");
this.store = store;
s3ReqDecorator = new S3RequestDecorator(prop);
s3service = Utils.openService(prop);
if (bucket == null || "".equals(bucket.trim())) {
bucket = prop.getProperty(S3Constants.S3_BUCKET);
// Alternately check if the 'container' property is set
if (Strings.isNullOrEmpty(bucket)) {
bucket = prop.getProperty(S3Constants.S3_CONTAINER);
}
}
String region = prop.getProperty(S3Constants.S3_REGION);
Region s3Region = null;
if (StringUtils.isNullOrEmpty(region)) {
com.amazonaws.regions.Region ec2Region = Regions.getCurrentRegion();
if (ec2Region != null) {
s3Region = Region.fromValue(ec2Region.getName());
} else {
throw new AmazonClientException("parameter [" + S3Constants.S3_REGION + "] not configured and cannot be derived from environment");
}
} else {
if (Utils.DEFAULT_AWS_BUCKET_REGION.equals(region)) {
s3Region = Region.US_Standard;
} else if (Region.EU_Ireland.toString().equals(region)) {
s3Region = Region.EU_Ireland;
} else {
s3Region = Region.fromValue(region);
}
}
if (!s3service.doesBucketExist(bucket)) {
s3service.createBucket(bucket, s3Region);
LOG.info("Created bucket [{}] in [{}] ", bucket, region);
} else {
LOG.info("Using bucket [{}] in [{}] ", bucket, region);
}
int writeThreads = 10;
String writeThreadsStr = prop.getProperty(S3Constants.S3_WRITE_THREADS);
if (writeThreadsStr != null) {
writeThreads = Integer.parseInt(writeThreadsStr);
}
LOG.info("Using thread pool of [{}] threads in S3 transfer manager.", writeThreads);
tmx = new TransferManager(s3service, (ThreadPoolExecutor) Executors.newFixedThreadPool(writeThreads, new NamedThreadFactory("s3-transfer-manager-worker")));
int asyncWritePoolSize = 10;
String maxConnsStr = prop.getProperty(S3Constants.S3_MAX_CONNS);
if (maxConnsStr != null) {
asyncWritePoolSize = Integer.parseInt(maxConnsStr) - writeThreads;
}
asyncWriteExecuter = (ThreadPoolExecutor) Executors.newFixedThreadPool(asyncWritePoolSize, new NamedThreadFactory("s3-write-worker"));
String renameKeyProp = prop.getProperty(S3Constants.S3_RENAME_KEYS);
boolean renameKeyBool = (renameKeyProp == null || "".equals(renameKeyProp)) ? false : Boolean.parseBoolean(renameKeyProp);
LOG.info("Rename keys [{}]", renameKeyBool);
if (renameKeyBool) {
renameKeys();
}
LOG.debug("S3 Backend initialized in [{}] ms", +(System.currentTimeMillis() - startTime.getTime()));
} catch (Exception e) {
LOG.debug(" error ", e);
Map<String, String> filteredMap = Maps.newHashMap();
if (prop != null) {
filteredMap = Maps.filterKeys(Maps.fromProperties(prop), new Predicate<String>() {
@Override
public boolean apply(String input) {
return !input.equals(S3Constants.ACCESS_KEY) && !input.equals(S3Constants.SECRET_KEY);
}
});
}
throw new DataStoreException("Could not initialize S3 from " + filteredMap, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
use of com.amazonaws.services.s3.transfer.Transfer in project jackrabbit-oak by apache.
the class S3Backend method init.
public void init() throws DataStoreException {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
startTime = new Date();
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
LOG.debug("init");
s3ReqDecorator = new S3RequestDecorator(properties);
s3service = Utils.openService(properties);
if (bucket == null || "".equals(bucket.trim())) {
bucket = properties.getProperty(S3Constants.S3_BUCKET);
// Alternately check if the 'container' property is set
if (Strings.isNullOrEmpty(bucket)) {
bucket = properties.getProperty(S3Constants.S3_CONTAINER);
}
}
String region = properties.getProperty(S3Constants.S3_REGION);
Region s3Region;
if (StringUtils.isNullOrEmpty(region)) {
com.amazonaws.regions.Region ec2Region = Regions.getCurrentRegion();
if (ec2Region != null) {
s3Region = Region.fromValue(ec2Region.getName());
} else {
throw new AmazonClientException("parameter [" + S3Constants.S3_REGION + "] not configured and cannot be derived from environment");
}
} else {
if (Utils.DEFAULT_AWS_BUCKET_REGION.equals(region)) {
s3Region = Region.US_Standard;
} else if (Region.EU_Ireland.toString().equals(region)) {
s3Region = Region.EU_Ireland;
} else {
s3Region = Region.fromValue(region);
}
}
if (!s3service.doesBucketExist(bucket)) {
s3service.createBucket(bucket, s3Region);
LOG.info("Created bucket [{}] in [{}] ", bucket, region);
} else {
LOG.info("Using bucket [{}] in [{}] ", bucket, region);
}
int writeThreads = 10;
String writeThreadsStr = properties.getProperty(S3Constants.S3_WRITE_THREADS);
if (writeThreadsStr != null) {
writeThreads = Integer.parseInt(writeThreadsStr);
}
LOG.info("Using thread pool of [{}] threads in S3 transfer manager.", writeThreads);
tmx = new TransferManager(s3service, Executors.newFixedThreadPool(writeThreads, new NamedThreadFactory("s3-transfer-manager-worker")));
String renameKeyProp = properties.getProperty(S3Constants.S3_RENAME_KEYS);
boolean renameKeyBool = (renameKeyProp == null || "".equals(renameKeyProp)) ? false : Boolean.parseBoolean(renameKeyProp);
LOG.info("Rename keys [{}]", renameKeyBool);
if (renameKeyBool) {
renameKeys();
}
LOG.debug("S3 Backend initialized in [{}] ms", +(System.currentTimeMillis() - startTime.getTime()));
} catch (Exception e) {
LOG.error("Error ", e);
Map<String, Object> filteredMap = Maps.newHashMap();
if (properties != null) {
filteredMap = Maps.filterKeys(Utils.asMap(properties), new Predicate<String>() {
@Override
public boolean apply(String input) {
return !input.equals(S3Constants.ACCESS_KEY) && !input.equals(S3Constants.SECRET_KEY);
}
});
}
throw new DataStoreException("Could not initialize S3 from " + filteredMap, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
Aggregations