Search in sources :

Example 1 with StorageStatistics

use of org.apache.hadoop.fs.StorageStatistics in project hadoop by apache.

the class WebHdfsFileSystem method initialize.

@Override
public synchronized void initialize(URI uri, Configuration conf) throws IOException {
    super.initialize(uri, conf);
    setConf(conf);
    // set user and acl patterns based on configuration file
    UserParam.setUserPattern(conf.get(HdfsClientConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY, HdfsClientConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT));
    AclPermissionParam.setAclPermissionPattern(conf.get(HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_KEY, HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT));
    boolean isOAuth = conf.getBoolean(HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_KEY, HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT);
    if (isOAuth) {
        LOG.debug("Enabling OAuth2 in WebHDFS");
        connectionFactory = URLConnectionFactory.newOAuth2URLConnectionFactory(conf);
    } else {
        LOG.debug("Not enabling OAuth2 in WebHDFS");
        connectionFactory = URLConnectionFactory.newDefaultURLConnectionFactory(conf);
    }
    ugi = UserGroupInformation.getCurrentUser();
    this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
    this.nnAddrs = resolveNNAddr();
    boolean isHA = HAUtilClient.isClientFailoverConfigured(conf, this.uri);
    boolean isLogicalUri = isHA && HAUtilClient.isLogicalUri(conf, this.uri);
    // In non-HA or non-logical URI case, the code needs to call
    // getCanonicalUri() in order to handle the case where no port is
    // specified in the URI
    this.tokenServiceName = isLogicalUri ? HAUtilClient.buildTokenServiceForLogicalUri(uri, getScheme()) : SecurityUtil.buildTokenService(getCanonicalUri());
    if (!isHA) {
        this.retryPolicy = RetryUtils.getDefaultRetryPolicy(conf, HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_KEY, HdfsClientConfigKeys.HttpClient.RETRY_POLICY_ENABLED_DEFAULT, HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_KEY, HdfsClientConfigKeys.HttpClient.RETRY_POLICY_SPEC_DEFAULT, HdfsConstants.SAFEMODE_EXCEPTION_CLASS_NAME);
    } else {
        int maxFailoverAttempts = conf.getInt(HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_KEY, HdfsClientConfigKeys.HttpClient.FAILOVER_MAX_ATTEMPTS_DEFAULT);
        int maxRetryAttempts = conf.getInt(HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_KEY, HdfsClientConfigKeys.HttpClient.RETRY_MAX_ATTEMPTS_DEFAULT);
        int failoverSleepBaseMillis = conf.getInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_KEY, HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_BASE_DEFAULT);
        int failoverSleepMaxMillis = conf.getInt(HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_KEY, HdfsClientConfigKeys.HttpClient.FAILOVER_SLEEPTIME_MAX_DEFAULT);
        this.retryPolicy = RetryPolicies.failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis, failoverSleepMaxMillis);
    }
    this.workingDir = makeQualified(new Path(getHomeDirectoryString(ugi)));
    this.canRefreshDelegationToken = UserGroupInformation.isSecurityEnabled();
    this.disallowFallbackToInsecureCluster = !conf.getBoolean(CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT);
    this.initializeRestCsrf(conf);
    this.delegationToken = null;
    storageStatistics = (DFSOpsCountStatistics) GlobalStorageStatistics.INSTANCE.put(DFSOpsCountStatistics.NAME, new StorageStatisticsProvider() {

        @Override
        public StorageStatistics provide() {
            return new DFSOpsCountStatistics();
        }
    });
}
Also used : Path(org.apache.hadoop.fs.Path) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) DFSOpsCountStatistics(org.apache.hadoop.hdfs.DFSOpsCountStatistics) StorageStatisticsProvider(org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider)

Example 2 with StorageStatistics

use of org.apache.hadoop.fs.StorageStatistics in project hadoop by apache.

the class S3AFileSystem method initialize.

/** Called after a new FileSystem instance is constructed.
   * @param name a uri whose authority section names the host, port, etc.
   *   for this FileSystem
   * @param originalConf the configuration to use for the FS. The
   * bucket-specific options are patched over the base ones before any use is
   * made of the config.
   */
public void initialize(URI name, Configuration originalConf) throws IOException {
    uri = S3xLoginHelper.buildFSURI(name);
    // get the host; this is guaranteed to be non-null, non-empty
    bucket = name.getHost();
    // clone the configuration into one with propagated bucket options
    Configuration conf = propagateBucketOptions(originalConf, bucket);
    patchSecurityCredentialProviders(conf);
    super.initialize(name, conf);
    setConf(conf);
    try {
        instrumentation = new S3AInstrumentation(name);
        // Username is the current user at the time the FS was instantiated.
        username = UserGroupInformation.getCurrentUser().getShortUserName();
        workingDir = new Path("/user", username).makeQualified(this.uri, this.getWorkingDirectory());
        Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass(S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class);
        s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf).createS3Client(name, uri);
        maxKeys = intOption(conf, MAX_PAGING_KEYS, DEFAULT_MAX_PAGING_KEYS, 1);
        listing = new Listing(this);
        partSize = getMultipartSizeProperty(conf, MULTIPART_SIZE, DEFAULT_MULTIPART_SIZE);
        multiPartThreshold = getMultipartSizeProperty(conf, MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
        //check but do not store the block size
        longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
        enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
        readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
        storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE.put(S3AStorageStatistics.NAME, new GlobalStorageStatistics.StorageStatisticsProvider() {

            @Override
            public StorageStatistics provide() {
                return new S3AStorageStatistics();
            }
        });
        int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
        if (maxThreads < 2) {
            LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
            maxThreads = 2;
        }
        int totalTasks = intOption(conf, MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
        long keepAliveTime = longOption(conf, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME, 0);
        boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared");
        unboundedThreadPool = new ThreadPoolExecutor(maxThreads, Integer.MAX_VALUE, keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), BlockingThreadPoolExecutorService.newDaemonThreadFactory("s3a-transfer-unbounded"));
        initTransferManager();
        initCannedAcls(conf);
        verifyBucketExists();
        initMultipartUploads(conf);
        serverSideEncryptionAlgorithm = S3AEncryptionMethods.getMethod(conf.getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM));
        if (S3AEncryptionMethods.SSE_C.equals(serverSideEncryptionAlgorithm) && StringUtils.isBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_C_NO_KEY_ERROR);
        }
        if (S3AEncryptionMethods.SSE_S3.equals(serverSideEncryptionAlgorithm) && StringUtils.isNotBlank(getServerSideEncryptionKey(getConf()))) {
            throw new IOException(Constants.SSE_S3_WITH_KEY_ERROR);
        }
        LOG.debug("Using encryption {}", serverSideEncryptionAlgorithm);
        inputPolicy = S3AInputPolicy.getPolicy(conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL));
        blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD);
        if (blockUploadEnabled) {
            blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, DEFAULT_FAST_UPLOAD_BUFFER);
            partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize);
            blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer);
            blockOutputActiveBlocks = intOption(conf, FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1);
            LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + " queue limit={}", blockOutputBuffer, partSize, blockOutputActiveBlocks);
        } else {
            LOG.debug("Using S3AOutputStream");
        }
    } catch (AmazonClientException e) {
        throw translateException("initializing ", new Path(name), e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TransferManagerConfiguration(com.amazonaws.services.s3.transfer.TransferManagerConfiguration) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) AmazonClientException(com.amazonaws.AmazonClientException) PathIOException(org.apache.hadoop.fs.PathIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics)

Example 3 with StorageStatistics

use of org.apache.hadoop.fs.StorageStatistics in project hadoop by apache.

the class DistributedFileSystem method initialize.

@Override
public void initialize(URI uri, Configuration conf) throws IOException {
    super.initialize(uri, conf);
    setConf(conf);
    String host = uri.getHost();
    if (host == null) {
        throw new IOException("Incomplete HDFS URI, no host: " + uri);
    }
    homeDirPrefix = conf.get(HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY, HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT);
    this.dfs = new DFSClient(uri, conf, statistics);
    this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
    this.workingDir = getHomeDirectory();
    storageStatistics = (DFSOpsCountStatistics) GlobalStorageStatistics.INSTANCE.put(DFSOpsCountStatistics.NAME, new StorageStatisticsProvider() {

        @Override
        public StorageStatistics provide() {
            return new DFSOpsCountStatistics();
        }
    });
}
Also used : GlobalStorageStatistics(org.apache.hadoop.fs.GlobalStorageStatistics) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) IOException(java.io.IOException) StorageStatisticsProvider(org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider)

Example 4 with StorageStatistics

use of org.apache.hadoop.fs.StorageStatistics in project hadoop by apache.

the class AbstractSTestS3AHugeFiles method test_010_CreateHugeFile.

@Test
public void test_010_CreateHugeFile() throws IOException {
    assertFalse("Please run this test sequentially to avoid timeouts" + " and bandwidth problems", isParallelExecution());
    long filesize = getTestPropertyBytes(getConf(), KEY_HUGE_FILESIZE, DEFAULT_HUGE_FILESIZE);
    long filesizeMB = filesize / _1MB;
    // clean up from any previous attempts
    deleteHugeFile();
    describe("Creating file %s of size %d MB" + " with partition size %d buffered by %s", hugefile, filesizeMB, partitionSize, getBlockOutputBufferName());
    // now do a check of available upload time, with a pessimistic bandwidth
    // (that of remote upload tests). If the test times out then not only is
    // the test outcome lost, as the follow-on tests continue, they will
    // overlap with the ongoing upload test, for much confusion.
    int timeout = getTestTimeoutSeconds();
    // assume 1 MB/s upload bandwidth
    int bandwidth = _1MB;
    long uploadTime = filesize / bandwidth;
    assertTrue(String.format("Timeout set in %s seconds is too low;" + " estimating upload time of %d seconds at 1 MB/s." + " Rerun tests with -D%s=%d", timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2), uploadTime < timeout);
    assertEquals("File size set in " + KEY_HUGE_FILESIZE + " = " + filesize + " is not a multiple of " + uploadBlockSize, 0, filesize % uploadBlockSize);
    byte[] data = new byte[uploadBlockSize];
    for (int i = 0; i < uploadBlockSize; i++) {
        data[i] = (byte) (i % 256);
    }
    long blocks = filesize / uploadBlockSize;
    long blocksPerMB = _1MB / uploadBlockSize;
    // perform the upload.
    // there's lots of logging here, so that a tail -f on the output log
    // can give a view of what is happening.
    S3AFileSystem fs = getFileSystem();
    StorageStatistics storageStatistics = fs.getStorageStatistics();
    String putRequests = Statistic.OBJECT_PUT_REQUESTS.getSymbol();
    String putBytes = Statistic.OBJECT_PUT_BYTES.getSymbol();
    Statistic putRequestsActive = Statistic.OBJECT_PUT_REQUESTS_ACTIVE;
    Statistic putBytesPending = Statistic.OBJECT_PUT_BYTES_PENDING;
    ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
    S3AInstrumentation.OutputStreamStatistics streamStatistics;
    long blocksPer10MB = blocksPerMB * 10;
    ProgressCallback progress = new ProgressCallback(timer);
    try (FSDataOutputStream out = fs.create(hugefile, true, uploadBlockSize, progress)) {
        try {
            streamStatistics = getOutputStreamStatistics(out);
        } catch (ClassCastException e) {
            LOG.info("Wrapped output stream is not block stream: {}", out.getWrappedStream());
            streamStatistics = null;
        }
        for (long block = 1; block <= blocks; block++) {
            out.write(data);
            long written = block * uploadBlockSize;
            // every 10 MB and on file upload @ 100%, print some stats
            if (block % blocksPer10MB == 0 || written == filesize) {
                long percentage = written * 100 / filesize;
                double elapsedTime = timer.elapsedTime() / 1.0e9;
                double writtenMB = 1.0 * written / _1MB;
                LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" + " PUT %d bytes (%d pending) in %d operations (%d active);" + " elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s", percentage, writtenMB, filesizeMB, storageStatistics.getLong(putBytes), gaugeValue(putBytesPending), storageStatistics.getLong(putRequests), gaugeValue(putRequestsActive), elapsedTime, writtenMB / elapsedTime));
            }
        }
        // now close the file
        LOG.info("Closing stream {}", out);
        LOG.info("Statistics : {}", streamStatistics);
        ContractTestUtils.NanoTimer closeTimer = new ContractTestUtils.NanoTimer();
        out.close();
        closeTimer.end("time to close() output stream");
    }
    timer.end("time to write %d MB in blocks of %d", filesizeMB, uploadBlockSize);
    logFSState();
    bandwidth(timer, filesize);
    LOG.info("Statistics after stream closed: {}", streamStatistics);
    long putRequestCount = storageStatistics.getLong(putRequests);
    Long putByteCount = storageStatistics.getLong(putBytes);
    LOG.info("PUT {} bytes in {} operations; {} MB/operation", putByteCount, putRequestCount, putByteCount / (putRequestCount * _1MB));
    LOG.info("Time per PUT {} nS", toHuman(timer.nanosPerOperation(putRequestCount)));
    assertEquals("active put requests in \n" + fs, 0, gaugeValue(putRequestsActive));
    ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
    S3AFileStatus status = fs.getFileStatus(hugefile);
    ContractTestUtils.assertIsFile(hugefile, status);
    assertEquals("File size in " + status, filesize, status.getLen());
    if (progress != null) {
        progress.verifyNoFailures("Put file " + hugefile + " of size " + filesize);
    }
    if (streamStatistics != null) {
        assertEquals("actively allocated blocks in " + streamStatistics, 0, streamStatistics.blocksActivelyAllocated());
    }
}
Also used : S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) ContractTestUtils(org.apache.hadoop.fs.contract.ContractTestUtils) S3AFileStatus(org.apache.hadoop.fs.s3a.S3AFileStatus) Statistic(org.apache.hadoop.fs.s3a.Statistic) AtomicLong(java.util.concurrent.atomic.AtomicLong) S3AInstrumentation(org.apache.hadoop.fs.s3a.S3AInstrumentation) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Aggregations

StorageStatistics (org.apache.hadoop.fs.StorageStatistics)4 GlobalStorageStatistics (org.apache.hadoop.fs.GlobalStorageStatistics)3 IOException (java.io.IOException)2 StorageStatisticsProvider (org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider)2 Path (org.apache.hadoop.fs.Path)2 AmazonClientException (com.amazonaws.AmazonClientException)1 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)1 TransferManagerConfiguration (com.amazonaws.services.s3.transfer.TransferManagerConfiguration)1 InterruptedIOException (java.io.InterruptedIOException)1 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Configuration (org.apache.hadoop.conf.Configuration)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 PathIOException (org.apache.hadoop.fs.PathIOException)1 ContractTestUtils (org.apache.hadoop.fs.contract.ContractTestUtils)1 S3AFileStatus (org.apache.hadoop.fs.s3a.S3AFileStatus)1 S3AFileSystem (org.apache.hadoop.fs.s3a.S3AFileSystem)1 S3AInstrumentation (org.apache.hadoop.fs.s3a.S3AInstrumentation)1 Statistic (org.apache.hadoop.fs.s3a.Statistic)1