Examples with RemoteIterator - org.apache.hadoop.fs.RemoteIterator

Example 1 with RemoteIterator

use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.

the class TestFsDatasetCache method testReCacheAfterUncache.

@Test(timeout = 60000)
public void testReCacheAfterUncache() throws Exception {
    final int TOTAL_BLOCKS_PER_CACHE = Ints.checkedCast(CACHE_CAPACITY / BLOCK_SIZE);
    BlockReaderTestUtil.enableHdfsCachingTracing();
    Assert.assertEquals(0, CACHE_CAPACITY % BLOCK_SIZE);
    // Create a small file
    final Path SMALL_FILE = new Path("/smallFile");
    DFSTestUtil.createFile(fs, SMALL_FILE, BLOCK_SIZE, (short) 1, 0xcafe);
    // Create a file that will take up the whole cache
    final Path BIG_FILE = new Path("/bigFile");
    DFSTestUtil.createFile(fs, BIG_FILE, TOTAL_BLOCKS_PER_CACHE * BLOCK_SIZE, (short) 1, 0xbeef);
    final DistributedFileSystem dfs = cluster.getFileSystem();
    dfs.addCachePool(new CachePoolInfo("pool"));
    final long bigCacheDirectiveId = dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool").setPath(BIG_FILE).setReplication((short) 1).build());
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
            long blocksCached = MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
            if (blocksCached != TOTAL_BLOCKS_PER_CACHE) {
                LOG.info("waiting for " + TOTAL_BLOCKS_PER_CACHE + " to " + "be cached.   Right now only " + blocksCached + " blocks are cached.");
                return false;
            }
            LOG.info(TOTAL_BLOCKS_PER_CACHE + " blocks are now cached.");
            return true;
        }
    }, 1000, 30000);
    // Try to cache a smaller file.  It should fail.
    final long shortCacheDirectiveId = dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool").setPath(SMALL_FILE).setReplication((short) 1).build());
    Thread.sleep(10000);
    MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
    Assert.assertEquals(TOTAL_BLOCKS_PER_CACHE, MetricsAsserts.getLongCounter("BlocksCached", dnMetrics));
    // Uncache the big file and verify that the small file can now be
    // cached (regression test for HDFS-6107)
    dfs.removeCacheDirective(bigCacheDirectiveId);
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            RemoteIterator<CacheDirectiveEntry> iter;
            try {
                iter = dfs.listCacheDirectives(new CacheDirectiveInfo.Builder().build());
                CacheDirectiveEntry entry;
                do {
                    entry = iter.next();
                } while (entry.getInfo().getId() != shortCacheDirectiveId);
                if (entry.getStats().getFilesCached() != 1) {
                    LOG.info("waiting for directive " + shortCacheDirectiveId + " to be cached.  stats = " + entry.getStats());
                    return false;
                }
                LOG.info("directive " + shortCacheDirectiveId + " has been cached.");
            } catch (IOException e) {
                Assert.fail("unexpected exception" + e.toString());
            }
            return true;
        }
    }, 1000, 30000);
    dfs.removeCacheDirective(shortCacheDirectiveId);
}

Also used : Path(org.apache.hadoop.fs.Path) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) IOException(java.io.IOException) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) CacheDirectiveInfo(org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo) CacheDirectiveEntry(org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry) Matchers.anyBoolean(org.mockito.Matchers.anyBoolean) CachePoolInfo(org.apache.hadoop.hdfs.protocol.CachePoolInfo) MetricsRecordBuilder(org.apache.hadoop.metrics2.MetricsRecordBuilder) Test(org.junit.Test)

Example 2 with RemoteIterator

use of org.apache.hadoop.fs.RemoteIterator in project druid by druid-io.

the class HdfsDataSegmentPuller method getSegmentFiles.

public FileUtils.FileCopyResult getSegmentFiles(final Path path, final File outDir) throws SegmentLoadingException {
    try {
        final FileSystem fs = path.getFileSystem(config);
        if (fs.isDirectory(path)) {
            try {
                return RetryUtils.retry(new Callable<FileUtils.FileCopyResult>() {

                    @Override
                    public FileUtils.FileCopyResult call() throws Exception {
                        if (!fs.exists(path)) {
                            throw new SegmentLoadingException("No files found at [%s]", path.toString());
                        }
                        final RemoteIterator<LocatedFileStatus> children = fs.listFiles(path, false);
                        final ArrayList<FileUtils.FileCopyResult> localChildren = new ArrayList<>();
                        final FileUtils.FileCopyResult result = new FileUtils.FileCopyResult();
                        while (children.hasNext()) {
                            final LocatedFileStatus child = children.next();
                            final Path childPath = child.getPath();
                            final String fname = childPath.getName();
                            if (fs.isDirectory(childPath)) {
                                log.warn("[%s] is a child directory, skipping", childPath.toString());
                            } else {
                                final File outFile = new File(outDir, fname);
                                // Actual copy
                                fs.copyToLocalFile(childPath, new Path(outFile.toURI()));
                                result.addFile(outFile);
                            }
                        }
                        log.info("Copied %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
                        return result;
                    }
                }, shouldRetryPredicate(), DEFAULT_RETRY_COUNT);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        } else if (CompressionUtils.isZip(path.getName())) {
            // --------    zip     ---------
            final FileUtils.FileCopyResult result = CompressionUtils.unzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outDir, shouldRetryPredicate(), false);
            log.info("Unzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outDir.getAbsolutePath());
            return result;
        } else if (CompressionUtils.isGz(path.getName())) {
            // --------    gzip     ---------
            final String fname = path.getName();
            final File outFile = new File(outDir, CompressionUtils.getGzBaseName(fname));
            final FileUtils.FileCopyResult result = CompressionUtils.gunzip(new ByteSource() {

                @Override
                public InputStream openStream() throws IOException {
                    return getInputStream(path);
                }
            }, outFile);
            log.info("Gunzipped %d bytes from [%s] to [%s]", result.size(), path.toString(), outFile.getAbsolutePath());
            return result;
        } else {
            throw new SegmentLoadingException("Do not know how to handle file type at [%s]", path.toString());
        }
    } catch (IOException e) {
        throw new SegmentLoadingException(e, "Error loading [%s]", path.toString());
    }
}

Also used : Path(org.apache.hadoop.fs.Path) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) FileUtils(io.druid.java.util.common.FileUtils) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) IOException(java.io.IOException) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) ByteSource(com.google.common.io.ByteSource) File(java.io.File)

Example 3 with RemoteIterator

use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.

the class ViewFileSystem method listLocatedStatus.

@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f, final PathFilter filter) throws FileNotFoundException, IOException {
    final InodeTree.ResolveResult<FileSystem> res = fsState.resolve(getUriPath(f), true);
    final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem.listLocatedStatus(res.remainingPath);
    if (res.isInternalDir()) {
        return statusIter;
    }
    return new RemoteIterator<LocatedFileStatus>() {

        @Override
        public boolean hasNext() throws IOException {
            return statusIter.hasNext();
        }

        @Override
        public LocatedFileStatus next() throws IOException {
            final LocatedFileStatus status = statusIter.next();
            return (LocatedFileStatus) fixFileStatus(status, getChrootedPath(res, status, f));
        }
    };
}

Also used : RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus)

Example 4 with RemoteIterator

use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.

the class DistributedFileSystem method listCacheDirectives.

/**
   * List cache directives.  Incrementally fetches results from the server.
   *
   * @param filter Filter parameters to use when listing the directives, null to
   *               list all directives visible to us.
   * @return A RemoteIterator which returns CacheDirectiveInfo objects.
   */
public RemoteIterator<CacheDirectiveEntry> listCacheDirectives(CacheDirectiveInfo filter) throws IOException {
    if (filter == null) {
        filter = new CacheDirectiveInfo.Builder().build();
    }
    if (filter.getPath() != null) {
        filter = new CacheDirectiveInfo.Builder(filter).setPath(new Path(getPathName(fixRelativePart(filter.getPath())))).build();
    }
    final RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives(filter);
    return new RemoteIterator<CacheDirectiveEntry>() {

        @Override
        public boolean hasNext() throws IOException {
            return iter.hasNext();
        }

        @Override
        public CacheDirectiveEntry next() throws IOException {
            // Although the paths we get back from the NameNode should always be
            // absolute, we call makeQualified to add the scheme and authority of
            // this DistributedFilesystem.
            CacheDirectiveEntry desc = iter.next();
            CacheDirectiveInfo info = desc.getInfo();
            Path p = info.getPath().makeQualified(getUri(), getWorkingDirectory());
            return new CacheDirectiveEntry(new CacheDirectiveInfo.Builder(info).setPath(p).build(), desc.getStats());
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) CacheDirectiveInfo(org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo) CacheDirectiveEntry(org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry)

Example 5 with RemoteIterator

use of org.apache.hadoop.fs.RemoteIterator in project hadoop by apache.

the class TestWebHDFS method testLargeDirectory.

@Test(timeout = 300000)
public void testLargeDirectory() throws Exception {
    final Configuration conf = WebHdfsTestUtil.createConf();
    final int listLimit = 2;
    // force small chunking of directory listing
    conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit);
    // force paths to be only owner-accessible to ensure ugi isn't changing
    // during listStatus
    FsPermission.setUMask(conf, new FsPermission((short) 0077));
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    try {
        cluster.waitActive();
        WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME).setPermission(new Path("/"), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
        // trick the NN into not believing it's not the superuser so we can
        // tell if the correct user is used by listStatus
        UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("not-superuser", new String[] { "not-supergroup" }));
        UserGroupInformation.createUserForTesting("me", new String[] { "my-group" }).doAs(new PrivilegedExceptionAction<Void>() {

            @Override
            public Void run() throws IOException, URISyntaxException {
                FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME);
                Path d = new Path("/my-dir");
                Assert.assertTrue(fs.mkdirs(d));
                // Iterator should have no items when dir is empty
                RemoteIterator<FileStatus> it = fs.listStatusIterator(d);
                assertFalse(it.hasNext());
                Path p = new Path(d, "file-" + 0);
                Assert.assertTrue(fs.createNewFile(p));
                // Iterator should have an item when dir is not empty
                it = fs.listStatusIterator(d);
                assertTrue(it.hasNext());
                it.next();
                assertFalse(it.hasNext());
                for (int i = 1; i < listLimit * 3; i++) {
                    p = new Path(d, "file-" + i);
                    Assert.assertTrue(fs.createNewFile(p));
                }
                // Check the FileStatus[] listing
                FileStatus[] statuses = fs.listStatus(d);
                Assert.assertEquals(listLimit * 3, statuses.length);
                // Check the iterator-based listing
                GenericTestUtils.setLogLevel(WebHdfsFileSystem.LOG, Level.TRACE);
                GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.TRACE);
                it = fs.listStatusIterator(d);
                int count = 0;
                while (it.hasNext()) {
                    FileStatus stat = it.next();
                    assertEquals("FileStatuses not equal", statuses[count], stat);
                    count++;
                }
                assertEquals("Different # of statuses!", statuses.length, count);
                // Do some more basic iterator tests
                it = fs.listStatusIterator(d);
                // Try advancing the iterator without calling hasNext()
                for (int i = 0; i < statuses.length; i++) {
                    FileStatus stat = it.next();
                    assertEquals("FileStatuses not equal", statuses[i], stat);
                }
                assertFalse("No more items expected", it.hasNext());
                // Try doing next when out of items
                try {
                    it.next();
                    fail("Iterator should error if out of elements.");
                } catch (IllegalStateException e) {
                // pass
                }
                return null;
            }
        });
    } finally {
        cluster.shutdown();
    }
}

Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) RemoteIterator(org.apache.hadoop.fs.RemoteIterator) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.junit.Test) HttpServerFunctionalTest(org.apache.hadoop.http.HttpServerFunctionalTest)

Aggregations

RemoteIterator (org.apache.hadoop.fs.RemoteIterator)8 Path (org.apache.hadoop.fs.Path)7 IOException (java.io.IOException)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 ArrayList (java.util.ArrayList)4 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)4 FileStatus (org.apache.hadoop.fs.FileStatus)3 Configuration (org.apache.hadoop.conf.Configuration)2 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)2 CacheDirectiveEntry (org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry)2 CacheDirectiveInfo (org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo)2 Test (org.junit.Test)2 ByteSource (com.google.common.io.ByteSource)1 FileUtils (io.druid.java.util.common.FileUtils)1 SegmentLoadingException (io.druid.segment.loading.SegmentLoadingException)1 File (java.io.File)1 InputStream (java.io.InputStream)1 URISyntaxException (java.net.URISyntaxException)1 SQLException (java.sql.SQLException)1 List (java.util.List)1