use of org.apache.hadoop.fs.s3a.S3AFileSystem in project hadoop by apache.
the class ITestS3ADeleteManyFiles method testBulkRenameAndDelete.
/**
* CAUTION: If this test starts failing, please make sure that the
* {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not
* set too low. Alternatively, consider reducing the
* <code>scale.test.operation.count</code> parameter in
* <code>getOperationCount()</code>.
*
* @see #getOperationCount()
*/
@Test
public void testBulkRenameAndDelete() throws Throwable {
final Path scaleTestDir = path("testBulkRenameAndDelete");
final Path srcDir = new Path(scaleTestDir, "src");
final Path finalDir = new Path(scaleTestDir, "final");
final long count = getOperationCount();
final S3AFileSystem fs = getFileSystem();
ContractTestUtils.rm(fs, scaleTestDir, true, false);
fs.mkdirs(srcDir);
fs.mkdirs(finalDir);
int testBufferSize = fs.getConf().getInt(ContractTestUtils.IO_CHUNK_BUFFER_SIZE, ContractTestUtils.DEFAULT_IO_CHUNK_BUFFER_SIZE);
// use Executor to speed up file creation
ExecutorService exec = Executors.newFixedThreadPool(16);
final ExecutorCompletionService<Boolean> completionService = new ExecutorCompletionService<>(exec);
try {
final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z');
for (int i = 0; i < count; ++i) {
final String fileName = "foo-" + i;
completionService.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws IOException {
ContractTestUtils.createFile(fs, new Path(srcDir, fileName), false, data);
return fs.exists(new Path(srcDir, fileName));
}
});
}
for (int i = 0; i < count; ++i) {
final Future<Boolean> future = completionService.take();
try {
if (!future.get()) {
LOG.warn("cannot create file");
}
} catch (ExecutionException e) {
LOG.warn("Error while uploading file", e.getCause());
throw e;
}
}
} finally {
exec.shutdown();
}
int nSrcFiles = fs.listStatus(srcDir).length;
fs.rename(srcDir, finalDir);
assertEquals(nSrcFiles, fs.listStatus(finalDir).length);
ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + 0));
ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + count / 2));
ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", new Path(srcDir, "foo-" + (count - 1)));
ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + 0));
ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + count / 2));
ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", new Path(finalDir, "foo-" + (count - 1)));
ContractTestUtils.assertDeleted(fs, finalDir, true, false);
}
use of org.apache.hadoop.fs.s3a.S3AFileSystem in project hadoop by apache.
the class ITestS3ADirectoryPerformance method testListOperations.
@Test
public void testListOperations() throws Throwable {
describe("Test recursive list operations");
final Path scaleTestDir = path("testListOperations");
final Path listDir = new Path(scaleTestDir, "lists");
S3AFileSystem fs = getFileSystem();
// scale factor.
int scale = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
int width = scale;
int depth = scale;
int files = scale;
MetricDiff metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
MetricDiff listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
MetricDiff listContinueRequests = new MetricDiff(fs, OBJECT_CONTINUE_LIST_REQUESTS);
MetricDiff listStatusCalls = new MetricDiff(fs, INVOCATION_LIST_FILES);
MetricDiff getFileStatusCalls = new MetricDiff(fs, INVOCATION_GET_FILE_STATUS);
NanoTimer createTimer = new NanoTimer();
TreeScanResults created = createSubdirs(fs, listDir, depth, width, files, 0);
// add some empty directories
int emptyDepth = 1 * scale;
int emptyWidth = 3 * scale;
created.add(createSubdirs(fs, listDir, emptyDepth, emptyWidth, 0, 0, "empty", "f-", ""));
createTimer.end("Time to create %s", created);
LOG.info("Time per operation: {}", toHuman(createTimer.nanosPerOperation(created.totalCount())));
printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls);
describe("Listing files via treewalk");
try {
// Scan the directory via an explicit tree walk.
// This is the baseline for any listing speedups.
NanoTimer treeWalkTimer = new NanoTimer();
TreeScanResults treewalkResults = treeWalk(fs, listDir);
treeWalkTimer.end("List status via treewalk of %s", created);
printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls);
assertEquals("Files found in listFiles(recursive=true) " + " created=" + created + " listed=" + treewalkResults, created.getFileCount(), treewalkResults.getFileCount());
describe("Listing files via listFiles(recursive=true)");
// listFiles() does the recursion internally
NanoTimer listFilesRecursiveTimer = new NanoTimer();
TreeScanResults listFilesResults = new TreeScanResults(fs.listFiles(listDir, true));
listFilesRecursiveTimer.end("listFiles(recursive=true) of %s", created);
assertEquals("Files found in listFiles(recursive=true) " + " created=" + created + " listed=" + listFilesResults, created.getFileCount(), listFilesResults.getFileCount());
// only two list operations should have taken place
print(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls);
assertEquals(listRequests.toString(), 2, listRequests.diff());
reset(metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls);
} finally {
describe("deletion");
// deletion at the end of the run
NanoTimer deleteTimer = new NanoTimer();
fs.delete(listDir, true);
deleteTimer.end("Deleting directory tree");
printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls);
}
}
use of org.apache.hadoop.fs.s3a.S3AFileSystem in project hadoop by apache.
the class AbstractSTestS3AHugeFiles method test_010_CreateHugeFile.
@Test
public void test_010_CreateHugeFile() throws IOException {
assertFalse("Please run this test sequentially to avoid timeouts" + " and bandwidth problems", isParallelExecution());
long filesize = getTestPropertyBytes(getConf(), KEY_HUGE_FILESIZE, DEFAULT_HUGE_FILESIZE);
long filesizeMB = filesize / _1MB;
// clean up from any previous attempts
deleteHugeFile();
describe("Creating file %s of size %d MB" + " with partition size %d buffered by %s", hugefile, filesizeMB, partitionSize, getBlockOutputBufferName());
// now do a check of available upload time, with a pessimistic bandwidth
// (that of remote upload tests). If the test times out then not only is
// the test outcome lost, as the follow-on tests continue, they will
// overlap with the ongoing upload test, for much confusion.
int timeout = getTestTimeoutSeconds();
// assume 1 MB/s upload bandwidth
int bandwidth = _1MB;
long uploadTime = filesize / bandwidth;
assertTrue(String.format("Timeout set in %s seconds is too low;" + " estimating upload time of %d seconds at 1 MB/s." + " Rerun tests with -D%s=%d", timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2), uploadTime < timeout);
assertEquals("File size set in " + KEY_HUGE_FILESIZE + " = " + filesize + " is not a multiple of " + uploadBlockSize, 0, filesize % uploadBlockSize);
byte[] data = new byte[uploadBlockSize];
for (int i = 0; i < uploadBlockSize; i++) {
data[i] = (byte) (i % 256);
}
long blocks = filesize / uploadBlockSize;
long blocksPerMB = _1MB / uploadBlockSize;
// perform the upload.
// there's lots of logging here, so that a tail -f on the output log
// can give a view of what is happening.
S3AFileSystem fs = getFileSystem();
StorageStatistics storageStatistics = fs.getStorageStatistics();
String putRequests = Statistic.OBJECT_PUT_REQUESTS.getSymbol();
String putBytes = Statistic.OBJECT_PUT_BYTES.getSymbol();
Statistic putRequestsActive = Statistic.OBJECT_PUT_REQUESTS_ACTIVE;
Statistic putBytesPending = Statistic.OBJECT_PUT_BYTES_PENDING;
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
S3AInstrumentation.OutputStreamStatistics streamStatistics;
long blocksPer10MB = blocksPerMB * 10;
ProgressCallback progress = new ProgressCallback(timer);
try (FSDataOutputStream out = fs.create(hugefile, true, uploadBlockSize, progress)) {
try {
streamStatistics = getOutputStreamStatistics(out);
} catch (ClassCastException e) {
LOG.info("Wrapped output stream is not block stream: {}", out.getWrappedStream());
streamStatistics = null;
}
for (long block = 1; block <= blocks; block++) {
out.write(data);
long written = block * uploadBlockSize;
// every 10 MB and on file upload @ 100%, print some stats
if (block % blocksPer10MB == 0 || written == filesize) {
long percentage = written * 100 / filesize;
double elapsedTime = timer.elapsedTime() / 1.0e9;
double writtenMB = 1.0 * written / _1MB;
LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" + " PUT %d bytes (%d pending) in %d operations (%d active);" + " elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s", percentage, writtenMB, filesizeMB, storageStatistics.getLong(putBytes), gaugeValue(putBytesPending), storageStatistics.getLong(putRequests), gaugeValue(putRequestsActive), elapsedTime, writtenMB / elapsedTime));
}
}
// now close the file
LOG.info("Closing stream {}", out);
LOG.info("Statistics : {}", streamStatistics);
ContractTestUtils.NanoTimer closeTimer = new ContractTestUtils.NanoTimer();
out.close();
closeTimer.end("time to close() output stream");
}
timer.end("time to write %d MB in blocks of %d", filesizeMB, uploadBlockSize);
logFSState();
bandwidth(timer, filesize);
LOG.info("Statistics after stream closed: {}", streamStatistics);
long putRequestCount = storageStatistics.getLong(putRequests);
Long putByteCount = storageStatistics.getLong(putBytes);
LOG.info("PUT {} bytes in {} operations; {} MB/operation", putByteCount, putRequestCount, putByteCount / (putRequestCount * _1MB));
LOG.info("Time per PUT {} nS", toHuman(timer.nanosPerOperation(putRequestCount)));
assertEquals("active put requests in \n" + fs, 0, gaugeValue(putRequestsActive));
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
S3AFileStatus status = fs.getFileStatus(hugefile);
ContractTestUtils.assertIsFile(hugefile, status);
assertEquals("File size in " + status, filesize, status.getLen());
if (progress != null) {
progress.verifyNoFailures("Put file " + hugefile + " of size " + filesize);
}
if (streamStatistics != null) {
assertEquals("actively allocated blocks in " + streamStatistics, 0, streamStatistics.blocksActivelyAllocated());
}
}
use of org.apache.hadoop.fs.s3a.S3AFileSystem in project hadoop by apache.
the class ITestS3ADirectoryPerformance method testTimeToStatNonEmptyDirectory.
@Test
public void testTimeToStatNonEmptyDirectory() throws Throwable {
describe("Time to stat a non-empty directory");
Path path = path("dir");
S3AFileSystem fs = getFileSystem();
fs.mkdirs(path);
touch(fs, new Path(path, "file"));
timeToStatPath(path);
}
use of org.apache.hadoop.fs.s3a.S3AFileSystem in project hadoop by apache.
the class ITestS3ADirectoryPerformance method timeToStatPath.
private void timeToStatPath(Path path) throws IOException {
describe("Timing getFileStatus(\"%s\")", path);
S3AFileSystem fs = getFileSystem();
MetricDiff metadataRequests = new MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS);
MetricDiff listRequests = new MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS);
long attempts = getOperationCount();
NanoTimer timer = new NanoTimer();
for (long l = 0; l < attempts; l++) {
fs.getFileStatus(path);
}
timer.end("Time to execute %d getFileStatusCalls", attempts);
LOG.info("Time per call: {}", toHuman(timer.nanosPerOperation(attempts)));
LOG.info("metadata: {}", metadataRequests);
LOG.info("metadata per operation {}", metadataRequests.diff() / attempts);
LOG.info("listObjects: {}", listRequests);
LOG.info("listObjects: per operation {}", listRequests.diff() / attempts);
}
Aggregations