use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.
the class TestFsDatasetCache method testReCacheAfterUncache.
@Test(timeout = 60000)
public void testReCacheAfterUncache() throws Exception {
final int TOTAL_BLOCKS_PER_CACHE = Ints.checkedCast(CACHE_CAPACITY / BLOCK_SIZE);
BlockReaderTestUtil.enableHdfsCachingTracing();
Assert.assertEquals(0, CACHE_CAPACITY % BLOCK_SIZE);
// Create a small file
final Path SMALL_FILE = new Path("/smallFile");
DFSTestUtil.createFile(fs, SMALL_FILE, BLOCK_SIZE, (short) 1, 0xcafe);
// Create a file that will take up the whole cache
final Path BIG_FILE = new Path("/bigFile");
DFSTestUtil.createFile(fs, BIG_FILE, TOTAL_BLOCKS_PER_CACHE * BLOCK_SIZE, (short) 1, 0xbeef);
final DistributedFileSystem dfs = cluster.getFileSystem();
dfs.addCachePool(new CachePoolInfo("pool"));
final long bigCacheDirectiveId = dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool").setPath(BIG_FILE).setReplication((short) 1).build());
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
long blocksCached = MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
if (blocksCached != TOTAL_BLOCKS_PER_CACHE) {
LOG.info("waiting for " + TOTAL_BLOCKS_PER_CACHE + " to " + "be cached. Right now only " + blocksCached + " blocks are cached.");
return false;
}
LOG.info(TOTAL_BLOCKS_PER_CACHE + " blocks are now cached.");
return true;
}
}, 1000, 30000);
// Try to cache a smaller file. It should fail.
final long shortCacheDirectiveId = dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool").setPath(SMALL_FILE).setReplication((short) 1).build());
Thread.sleep(10000);
MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
Assert.assertEquals(TOTAL_BLOCKS_PER_CACHE, MetricsAsserts.getLongCounter("BlocksCached", dnMetrics));
// Uncache the big file and verify that the small file can now be
// cached (regression test for HDFS-6107)
dfs.removeCacheDirective(bigCacheDirectiveId);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
RemoteIterator<CacheDirectiveEntry> iter;
try {
iter = dfs.listCacheDirectives(new CacheDirectiveInfo.Builder().build());
CacheDirectiveEntry entry;
do {
entry = iter.next();
} while (entry.getInfo().getId() != shortCacheDirectiveId);
if (entry.getStats().getFilesCached() != 1) {
LOG.info("waiting for directive " + shortCacheDirectiveId + " to be cached. stats = " + entry.getStats());
return false;
}
LOG.info("directive " + shortCacheDirectiveId + " has been cached.");
} catch (IOException e) {
Assert.fail("unexpected exception" + e.toString());
}
return true;
}
}, 1000, 30000);
dfs.removeCacheDirective(shortCacheDirectiveId);
}
use of org.apache.hadoop.fs.RemoteIterator in project druid by druid-io.
the class HdfsDataSegmentPuller method getSegmentFiles.
public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
try {
final FileSystem fs = path.getFileSystem(config);
if (fs.isDirectory(path)) {
try {
return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {
@Override
public FileUtils.FileCopyResult call() throws Exception {
if (!fs.exists(path)) {
throw new SegmentLoadingException("No files found at [%s]", path.toString());
}
final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
while (children.hasNext()) {
final LocatedFileStatus child = children.next();
final Path childPath = child.getPath();
final String fname = childPath.getName();
if (fs.isDirectory(childPath)) {
log.warn("[%s] is a child directory, skipping", childPath.toString());
} else {
final File outFile = new File(outDir, fname);
// Actual copy
fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
result.addFile(outFile);
}
}
log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
}
}, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
} catch (Exception e) {
throw Throwables.propagate(e);
}
} else if (CompressionUtils.isZip(path.getName())) {
// -------- zip ---------
final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outDir, shouldRetryPredicate(), false);
log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
return result;
} else if (CompressionUtils.isGz(path.getName())) {
// -------- gzip ---------
final String fname = path.getName();
final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return getInputStream(path);
}
}, outFile);
log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
return result;
} else {
throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
}
} catch (IOException e) {
throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
}
}
use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.
the class ViewFileSystem method listLocatedStatus.
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException {
final InodeTree.ResolveResult<FileSystem> res = fsState.resolve(getUriPath(f), true);
final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem.listLocatedStatus(res.remainingPath);
if (res.isInternalDir()) {
return statusIter;
}
return new RemoteIterator<LocatedFileStatus>() {
@Override
public boolean hasNext() throws IOException {
return statusIter.hasNext();
}
@Override
public LocatedFileStatus next() throws IOException {
final LocatedFileStatus status = statusIter.next();
return (LocatedFileStatus) fixFileStatus(status, getChrootedPath(res, status, f));
}
};
}
use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.
the class DistributedFileSystem method listCacheDirectives.
/**
* List cache directives. Incrementally fetches results from the server.
*
* @param filter Filter parameters to use when listing the directives, null to
* list all directives visible to us.
* @return A RemoteIterator which returns CacheDirectiveInfo objects.
*/
public RemoteIterator<CacheDirectiveEntry> listCacheDirectives(CacheDirectiveInfo filter) throws IOException {
if (filter == null) {
filter = new CacheDirectiveInfo.Builder().build();
}
if (filter.getPath() != null) {
filter = new CacheDirectiveInfo.Builder(filter).setPath(new Path(getPathName(fixRelativePart(filter.getPath())))).build();
}
final RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives(filter);
return new RemoteIterator<CacheDirectiveEntry>() {
@Override
public boolean hasNext() throws IOException {
return iter.hasNext();
}
@Override
public CacheDirectiveEntry next() throws IOException {
// Although the paths we get back from the NameNode should always be
// absolute, we call makeQualified to add the scheme and authority of
// this DistributedFilesystem.
CacheDirectiveEntry desc = iter.next();
CacheDirectiveInfo info = desc.getInfo();
Path p = info.getPath().makeQualified(getUri(), getWorkingDirectory());
return new CacheDirectiveEntry(new CacheDirectiveInfo.Builder(info).setPath(p).build(), desc.getStats());
}
};
}
use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.
the class TestWebHDFS method testLargeDirectory.
@Test(timeout = 300000)
public void testLargeDirectory() throws Exception {
final Configuration conf = WebHdfsTestUtil.createConf();
final int listLimit = 2;
// force small chunking of directory listing
conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit);
// force paths to be only owner-accessible to ensure ugi isn't changing
// during listStatus
FsPermission.setUMask(conf, new FsPermission((short) 0077));
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
try {
cluster.waitActive();
WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME).setPermission(new Path("/"), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
// trick the NN into not believing it's not the superuser so we can
// tell if the correct user is used by listStatus
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("not-superuser", new String[] { "not-supergroup" }));
UserGroupInformation.createUserForTesting("me", new String[] { "my-group" }).doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws IOException, URISyntaxException {
FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME);
Path d = new Path("/my-dir");
Assert.assertTrue(fs.mkdirs(d));
// Iterator should have no items when dir is empty
RemoteIterator<FileStatus> it = fs.listStatusIterator(d);
assertFalse(it.hasNext());
Path p = new Path(d, "file-" + 0);
Assert.assertTrue(fs.createNewFile(p));
// Iterator should have an item when dir is not empty
it = fs.listStatusIterator(d);
assertTrue(it.hasNext());
it.next();
assertFalse(it.hasNext());
for (int i = 1; i < listLimit * 3; i++) {
p = new Path(d, "file-" + i);
Assert.assertTrue(fs.createNewFile(p));
}
// Check the FileStatus[] listing
FileStatus[] statuses = fs.listStatus(d);
Assert.assertEquals(listLimit * 3, statuses.length);
// Check the iterator-based listing
GenericTestUtils.setLogLevel(WebHdfsFileSystem.LOG, Level.TRACE);
GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.TRACE);
it = fs.listStatusIterator(d);
int count = 0;
while (it.hasNext()) {
FileStatus stat = it.next();
assertEquals("FileStatuses not equal", statuses[count], stat);
count++;
}
assertEquals("Different # of statuses!", statuses.length, count);
// Do some more basic iterator tests
it = fs.listStatusIterator(d);
// Try advancing the iterator without calling hasNext()
for (int i = 0; i < statuses.length; i++) {
FileStatus stat = it.next();
assertEquals("FileStatuses not equal", statuses[i], stat);
}
assertFalse("No more items expected", it.hasNext());
// Try doing next when out of items
try {
it.next();
fail("Iterator should error if out of elements.");
} catch (IllegalStateException e) {
// pass
}
return null;
}
});
} finally {
cluster.shutdown();
}
}
Aggregations