Search in sources :

Example 1 with S3AFileStatus

use of org.apache.hadoop.fs.s3a.S3AFileStatus in project hadoop by apache.

the class AbstractSTestS3AHugeFiles method test_040_PositionedReadHugeFile.

@Test
public void test_040_PositionedReadHugeFile() throws Throwable {
    assumeHugeFileExists();
    final String encryption = getConf().getTrimmed(SERVER_SIDE_ENCRYPTION_ALGORITHM);
    boolean encrypted = encryption != null;
    if (encrypted) {
        LOG.info("File is encrypted with algorithm {}", encryption);
    }
    String filetype = encrypted ? "encrypted file" : "file";
    describe("Positioned reads of %s %s", filetype, hugefile);
    S3AFileSystem fs = getFileSystem();
    S3AFileStatus status = fs.getFileStatus(hugefile);
    long filesize = status.getLen();
    int ops = 0;
    final int bufferSize = 8192;
    byte[] buffer = new byte[bufferSize];
    long eof = filesize - 1;
    ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
    ContractTestUtils.NanoTimer readAtByte0, readAtByte0Again, readAtEOF;
    try (FSDataInputStream in = fs.open(hugefile, uploadBlockSize)) {
        readAtByte0 = new ContractTestUtils.NanoTimer();
        in.readFully(0, buffer);
        readAtByte0.end("time to read data at start of file");
        ops++;
        readAtEOF = new ContractTestUtils.NanoTimer();
        in.readFully(eof - bufferSize, buffer);
        readAtEOF.end("time to read data at end of file");
        ops++;
        readAtByte0Again = new ContractTestUtils.NanoTimer();
        in.readFully(0, buffer);
        readAtByte0Again.end("time to read data at start of file again");
        ops++;
        LOG.info("Final stream state: {}", in);
    }
    long mb = Math.max(filesize / _1MB, 1);
    logFSState();
    timer.end("time to performed positioned reads of %s of %d MB ", filetype, mb);
    LOG.info("Time per positioned read = {} nS", toHuman(timer.nanosPerOperation(ops)));
}
Also used : S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) S3AFileStatus(org.apache.hadoop.fs.s3a.S3AFileStatus) ContractTestUtils(org.apache.hadoop.fs.contract.ContractTestUtils) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 2 with S3AFileStatus

use of org.apache.hadoop.fs.s3a.S3AFileStatus in project hadoop by apache.

the class AbstractSTestS3AHugeFiles method test_050_readHugeFile.

@Test
public void test_050_readHugeFile() throws Throwable {
    assumeHugeFileExists();
    describe("Reading %s", hugefile);
    S3AFileSystem fs = getFileSystem();
    S3AFileStatus status = fs.getFileStatus(hugefile);
    long filesize = status.getLen();
    long blocks = filesize / uploadBlockSize;
    byte[] data = new byte[uploadBlockSize];
    ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
    try (FSDataInputStream in = fs.open(hugefile, uploadBlockSize)) {
        for (long block = 0; block < blocks; block++) {
            in.readFully(data);
        }
        LOG.info("Final stream state: {}", in);
    }
    long mb = Math.max(filesize / _1MB, 1);
    timer.end("time to read file of %d MB ", mb);
    LOG.info("Time per MB to read = {} nS", toHuman(timer.nanosPerOperation(mb)));
    bandwidth(timer, filesize);
    logFSState();
}
Also used : S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) S3AFileStatus(org.apache.hadoop.fs.s3a.S3AFileStatus) ContractTestUtils(org.apache.hadoop.fs.contract.ContractTestUtils) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) Test(org.junit.Test)

Example 3 with S3AFileStatus

use of org.apache.hadoop.fs.s3a.S3AFileStatus in project hadoop by apache.

the class AbstractSTestS3AHugeFiles method test_100_renameHugeFile.

@Test
public void test_100_renameHugeFile() throws Throwable {
    assumeHugeFileExists();
    describe("renaming %s to %s", hugefile, hugefileRenamed);
    S3AFileSystem fs = getFileSystem();
    S3AFileStatus status = fs.getFileStatus(hugefile);
    long filesize = status.getLen();
    fs.delete(hugefileRenamed, false);
    ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
    fs.rename(hugefile, hugefileRenamed);
    long mb = Math.max(filesize / _1MB, 1);
    timer.end("time to rename file of %d MB", mb);
    LOG.info("Time per MB to rename = {} nS", toHuman(timer.nanosPerOperation(mb)));
    bandwidth(timer, filesize);
    logFSState();
    S3AFileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
    assertEquals(filesize, destFileStatus.getLen());
    // rename back
    ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer();
    fs.rename(hugefileRenamed, hugefile);
    timer2.end("Renaming back");
    LOG.info("Time per MB to rename = {} nS", toHuman(timer2.nanosPerOperation(mb)));
    bandwidth(timer2, filesize);
}
Also used : S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) S3AFileStatus(org.apache.hadoop.fs.s3a.S3AFileStatus) ContractTestUtils(org.apache.hadoop.fs.contract.ContractTestUtils) Test(org.junit.Test)

Example 4 with S3AFileStatus

use of org.apache.hadoop.fs.s3a.S3AFileStatus in project hadoop by apache.

the class AbstractSTestS3AHugeFiles method test_010_CreateHugeFile.

@Test
public void test_010_CreateHugeFile() throws IOException {
    assertFalse("Please run this test sequentially to avoid timeouts" + " and bandwidth problems", isParallelExecution());
    long filesize = getTestPropertyBytes(getConf(), KEY_HUGE_FILESIZE, DEFAULT_HUGE_FILESIZE);
    long filesizeMB = filesize / _1MB;
    // clean up from any previous attempts
    deleteHugeFile();
    describe("Creating file %s of size %d MB" + " with partition size %d buffered by %s", hugefile, filesizeMB, partitionSize, getBlockOutputBufferName());
    // now do a check of available upload time, with a pessimistic bandwidth
    // (that of remote upload tests). If the test times out then not only is
    // the test outcome lost, as the follow-on tests continue, they will
    // overlap with the ongoing upload test, for much confusion.
    int timeout = getTestTimeoutSeconds();
    // assume 1 MB/s upload bandwidth
    int bandwidth = _1MB;
    long uploadTime = filesize / bandwidth;
    assertTrue(String.format("Timeout set in %s seconds is too low;" + " estimating upload time of %d seconds at 1 MB/s." + " Rerun tests with -D%s=%d", timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2), uploadTime < timeout);
    assertEquals("File size set in " + KEY_HUGE_FILESIZE + " = " + filesize + " is not a multiple of " + uploadBlockSize, 0, filesize % uploadBlockSize);
    byte[] data = new byte[uploadBlockSize];
    for (int i = 0; i < uploadBlockSize; i++) {
        data[i] = (byte) (i % 256);
    }
    long blocks = filesize / uploadBlockSize;
    long blocksPerMB = _1MB / uploadBlockSize;
    // perform the upload.
    // there's lots of logging here, so that a tail -f on the output log
    // can give a view of what is happening.
    S3AFileSystem fs = getFileSystem();
    StorageStatistics storageStatistics = fs.getStorageStatistics();
    String putRequests = Statistic.OBJECT_PUT_REQUESTS.getSymbol();
    String putBytes = Statistic.OBJECT_PUT_BYTES.getSymbol();
    Statistic putRequestsActive = Statistic.OBJECT_PUT_REQUESTS_ACTIVE;
    Statistic putBytesPending = Statistic.OBJECT_PUT_BYTES_PENDING;
    ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
    S3AInstrumentation.OutputStreamStatistics streamStatistics;
    long blocksPer10MB = blocksPerMB * 10;
    ProgressCallback progress = new ProgressCallback(timer);
    try (FSDataOutputStream out = fs.create(hugefile, true, uploadBlockSize, progress)) {
        try {
            streamStatistics = getOutputStreamStatistics(out);
        } catch (ClassCastException e) {
            LOG.info("Wrapped output stream is not block stream: {}", out.getWrappedStream());
            streamStatistics = null;
        }
        for (long block = 1; block <= blocks; block++) {
            out.write(data);
            long written = block * uploadBlockSize;
            // every 10 MB and on file upload @ 100%, print some stats
            if (block % blocksPer10MB == 0 || written == filesize) {
                long percentage = written * 100 / filesize;
                double elapsedTime = timer.elapsedTime() / 1.0e9;
                double writtenMB = 1.0 * written / _1MB;
                LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" + " PUT %d bytes (%d pending) in %d operations (%d active);" + " elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s", percentage, writtenMB, filesizeMB, storageStatistics.getLong(putBytes), gaugeValue(putBytesPending), storageStatistics.getLong(putRequests), gaugeValue(putRequestsActive), elapsedTime, writtenMB / elapsedTime));
            }
        }
        // now close the file
        LOG.info("Closing stream {}", out);
        LOG.info("Statistics : {}", streamStatistics);
        ContractTestUtils.NanoTimer closeTimer = new ContractTestUtils.NanoTimer();
        out.close();
        closeTimer.end("time to close() output stream");
    }
    timer.end("time to write %d MB in blocks of %d", filesizeMB, uploadBlockSize);
    logFSState();
    bandwidth(timer, filesize);
    LOG.info("Statistics after stream closed: {}", streamStatistics);
    long putRequestCount = storageStatistics.getLong(putRequests);
    Long putByteCount = storageStatistics.getLong(putBytes);
    LOG.info("PUT {} bytes in {} operations; {} MB/operation", putByteCount, putRequestCount, putByteCount / (putRequestCount * _1MB));
    LOG.info("Time per PUT {} nS", toHuman(timer.nanosPerOperation(putRequestCount)));
    assertEquals("active put requests in \n" + fs, 0, gaugeValue(putRequestsActive));
    ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
    S3AFileStatus status = fs.getFileStatus(hugefile);
    ContractTestUtils.assertIsFile(hugefile, status);
    assertEquals("File size in " + status, filesize, status.getLen());
    if (progress != null) {
        progress.verifyNoFailures("Put file " + hugefile + " of size " + filesize);
    }
    if (streamStatistics != null) {
        assertEquals("actively allocated blocks in " + streamStatistics, 0, streamStatistics.blocksActivelyAllocated());
    }
}
Also used : S3AFileSystem(org.apache.hadoop.fs.s3a.S3AFileSystem) StorageStatistics(org.apache.hadoop.fs.StorageStatistics) ContractTestUtils(org.apache.hadoop.fs.contract.ContractTestUtils) S3AFileStatus(org.apache.hadoop.fs.s3a.S3AFileStatus) Statistic(org.apache.hadoop.fs.s3a.Statistic) AtomicLong(java.util.concurrent.atomic.AtomicLong) S3AInstrumentation(org.apache.hadoop.fs.s3a.S3AInstrumentation) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Aggregations

ContractTestUtils (org.apache.hadoop.fs.contract.ContractTestUtils)4 S3AFileStatus (org.apache.hadoop.fs.s3a.S3AFileStatus)4 S3AFileSystem (org.apache.hadoop.fs.s3a.S3AFileSystem)4 Test (org.junit.Test)4 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 StorageStatistics (org.apache.hadoop.fs.StorageStatistics)1 S3AInstrumentation (org.apache.hadoop.fs.s3a.S3AInstrumentation)1 Statistic (org.apache.hadoop.fs.s3a.Statistic)1