Search in sources :

Example 56 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.

the class TestBlockReorder method testBlockLocation.

   * Test that the reorder algo works as we expect.
public void testBlockLocation() throws Exception {
    // We need to start HBase to get  HConstants.HBASE_DIR set in conf
    MiniHBaseCluster hbm = htu.startMiniHBaseCluster(1, 1);
    conf = hbm.getConfiguration();
    // The "/" is mandatory, without it we've got a null pointer exception on the namenode
    final String fileName = "/helloWorld";
    Path p = new Path(fileName);
    final int repCount = 3;
    Assert.assertTrue((short) cluster.getDataNodes().size() >= repCount);
    // Let's write the file
    FSDataOutputStream fop = dfs.create(p, (short) repCount);
    final double toWrite = 875.5613;
    for (int i = 0; i < 10; i++) {
        // The interceptor is not set in this test, so we get the raw list at this point
        LocatedBlocks l;
        final long max = System.currentTimeMillis() + 10000;
        do {
            l = getNamenode(dfs.getClient()).getBlockLocations(fileName, 0, 1);
            Assert.assertEquals(l.getLocatedBlocks().size(), 1);
            Assert.assertTrue("Expecting " + repCount + " , got " + l.get(0).getLocations().length, System.currentTimeMillis() < max);
        } while (l.get(0).getLocations().length != repCount);
        // Should be filtered, the name is different => The order won't change
        Object[] originalList = l.getLocatedBlocks().toArray();
        HFileSystem.ReorderWALBlocks lrb = new HFileSystem.ReorderWALBlocks();
        lrb.reorderBlocks(conf, l, fileName);
        Assert.assertArrayEquals(originalList, l.getLocatedBlocks().toArray());
        // Should be reordered, as we pretend to be a file name with a compliant stuff
        String pseudoLogFile = conf.get(HConstants.HBASE_DIR) + "/" + HConstants.HREGION_LOGDIR_NAME + "/" + host1 + ",6977,6576" + "/mylogfile";
        // Check that it will be possible to extract a ServerName from our construction
        Assert.assertNotNull("log= " + pseudoLogFile, AbstractFSWALProvider.getServerNameFromWALDirectoryName(dfs.getConf(), pseudoLogFile));
        // And check we're doing the right reorder.
        lrb.reorderBlocks(conf, l, pseudoLogFile);
        Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
        // Check again, it should remain the same.
        lrb.reorderBlocks(conf, l, pseudoLogFile);
        Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
Also used : Path(org.apache.hadoop.fs.Path) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 57 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class NamenodeWebHdfsMethods method chooseDatanode.

static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, final String excludeDatanodes, final String remoteAddr) throws IOException {
    FSNamesystem fsn = namenode.getNamesystem();
    if (fsn == null) {
        throw new IOException("Namesystem has not been intialized yet.");
    final BlockManager bm = fsn.getBlockManager();
    HashSet<Node> excludes = new HashSet<Node>();
    if (excludeDatanodes != null) {
        for (String host : StringUtils.getTrimmedStringCollection(excludeDatanodes)) {
            int idx = host.indexOf(":");
            if (idx != -1) {
                excludes.add(bm.getDatanodeManager().getDatanodeByXferAddr(host.substring(0, idx), Integer.parseInt(host.substring(idx + 1))));
            } else {
    if (op == PutOpParam.Op.CREATE) {
        //choose a datanode near to client 
        final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(remoteAddr);
        if (clientNode != null) {
            final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS(path, clientNode, excludes, blocksize);
            if (storages.length > 0) {
                return storages[0].getDatanodeDescriptor();
    } else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) {
        //choose a datanode containing a replica 
        final NamenodeProtocols np = getRPCServer(namenode);
        final HdfsFileStatus status = np.getFileInfo(path);
        if (status == null) {
            throw new FileNotFoundException("File " + path + " not found.");
        final long len = status.getLen();
        if (op == GetOpParam.Op.OPEN) {
            if (openOffset < 0L || (openOffset >= len && len > 0)) {
                throw new IOException("Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path);
        if (len > 0) {
            final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
            final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
            final int count = locations.locatedBlockCount();
            if (count > 0) {
                return bestNode(locations.get(0).getLocations(), excludes);
    return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT, excludes);
Also used : NamenodeProtocols(org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Node( LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) FileNotFoundException( IOException( DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) HashSet(java.util.HashSet) VisibleForTesting(

Example 58 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.

the class TestBlockReorder method testBlockLocationReorder.

   * Test that we're can add a hook, and that this hook works when we try to read the file in HDFS.
public void testBlockLocationReorder() throws Exception {
    Path p = new Path("hello");
    Assert.assertTrue((short) cluster.getDataNodes().size() > 1);
    final int repCount = 2;
    // Let's write the file
    FSDataOutputStream fop = dfs.create(p, (short) repCount);
    final double toWrite = 875.5613;
    // Let's check we can read it when everybody's there
    long start = System.currentTimeMillis();
    FSDataInputStream fin =;
    Assert.assertTrue(toWrite == fin.readDouble());
    long end = System.currentTimeMillis();"readtime= " + (end - start));
    Assert.assertTrue((end - start) < 30 * 1000);
    // Let's kill the first location. But actually the fist location returned will change
    // The first thing to do is to get the location, then the port
    FileStatus f = dfs.getFileStatus(p);
    BlockLocation[] lbs;
    do {
        lbs = dfs.getFileBlockLocations(f, 0, 1);
    } while (lbs.length != 1 && lbs[0].getLength() != repCount);
    final String name = lbs[0].getNames()[0];
    Assert.assertTrue(name.indexOf(':') > 0);
    String portS = name.substring(name.indexOf(':') + 1);
    final int port = Integer.parseInt(portS);"port= " + port);
    int ipcPort = -1;
    // Let's find the DN to kill. cluster.getDataNodes(int) is not on the same port, so we need
    // to iterate ourselves.
    boolean ok = false;
    final String lookup = lbs[0].getHosts()[0];
    StringBuilder sb = new StringBuilder();
    for (DataNode dn : cluster.getDataNodes()) {
        final String dnName = getHostName(dn);
        sb.append(dnName).append(' ');
        if (lookup.equals(dnName)) {
            ok = true;
  "killing datanode " + name + " / " + lookup);
            ipcPort = dn.ipcServer.getListenerAddress().getPort();
  "killed datanode " + name + " / " + lookup);
    Assert.assertTrue("didn't find the server to kill, was looking for " + lookup + " found " + sb, ok);"ipc port= " + ipcPort);
    // Add the hook, with an implementation checking that we don't use the port we've just killed.
    Assert.assertTrue(HFileSystem.addLocationsOrderInterceptor(conf, new HFileSystem.ReorderBlocks() {

        public void reorderBlocks(Configuration c, LocatedBlocks lbs, String src) {
            for (LocatedBlock lb : lbs.getLocatedBlocks()) {
                if (lb.getLocations().length > 1) {
                    DatanodeInfo[] infos = lb.getLocations();
                    if (infos[0].getHostName().equals(lookup)) {
              "HFileSystem bad host, inverting");
                        DatanodeInfo tmp = infos[0];
                        infos[0] = infos[1];
                        infos[1] = tmp;
    final int retries = 10;
    ServerSocket ss = null;
    ServerSocket ssI;
    try {
        // We're taking the port to have a timeout issue later.
        ss = new ServerSocket(port);
        ssI = new ServerSocket(ipcPort);
    } catch (BindException be) {
        LOG.warn("Got bind exception trying to set up socket on " + port + " or " + ipcPort + ", this means that the datanode has not closed the socket or" + " someone else took it. It may happen, skipping this test for this time.", be);
        if (ss != null) {
    // so we try retries times;  with the reorder it will never last more than a few milli seconds
    for (int i = 0; i < retries; i++) {
        start = System.currentTimeMillis();
        fin =;
        Assert.assertTrue(toWrite == fin.readDouble());
        end = System.currentTimeMillis();"HFileSystem readtime= " + (end - start));
        Assert.assertFalse("We took too much time to read", (end - start) > 60000);
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) FileStatus(org.apache.hadoop.fs.FileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) BindException( ServerSocket( BlockLocation(org.apache.hadoop.fs.BlockLocation) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 59 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class DFSInputStream method fetchBlockAt.

/** Fetch a block from namenode and cache it */
private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) throws IOException {
    synchronized (infoLock) {
        int targetBlockIdx = locatedBlocks.findBlock(offset);
        if (targetBlockIdx < 0) {
            // block is not cached
            targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
            useCache = false;
        if (!useCache) {
            // fetch blocks
            final LocatedBlocks newBlocks = (length == 0) ? dfsClient.getLocatedBlocks(src, offset) : dfsClient.getLocatedBlocks(src, offset, length);
            if (newBlocks == null || newBlocks.locatedBlockCount() == 0) {
                throw new EOFException("Could not find target position " + offset);
            locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
        return locatedBlocks.get(targetBlockIdx);
Also used : LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) EOFException(

Example 60 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class DFSClient method getBlockLocations.

   * Get block location info about file
   * getBlockLocations() returns a list of hostnames that store
   * data for a specific file region.  It returns a set of hostnames
   * for every block within the indicated region.
   * This function is very useful when writing code that considers
   * data-placement when performing operations.  For example, the
   * MapReduce system tries to schedule tasks on the same machines
   * as the data-block the task processes.
public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException {
    try (TraceScope ignored = newPathTraceScope("getBlockLocations", src)) {
        LocatedBlocks blocks = getLocatedBlocks(src, start, length);
        BlockLocation[] locations = DFSUtilClient.locatedBlocks2Locations(blocks);
        HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
        for (int i = 0; i < locations.length; i++) {
            hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
        return hdfsLocations;
Also used : HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) TraceScope(org.apache.htrace.core.TraceScope) BlockLocation(org.apache.hadoop.fs.BlockLocation) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation)


LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)118 Test (org.junit.Test)67 Path (org.apache.hadoop.fs.Path)65 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)52 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)33 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)32 Configuration (org.apache.hadoop.conf.Configuration)29 IOException ( ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)20 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)20 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)18 FileSystem (org.apache.hadoop.fs.FileSystem)17 LocatedStripedBlock (org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)17 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)13 Block (org.apache.hadoop.hdfs.protocol.Block)11 InetSocketAddress ( HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)10 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 HdfsFileStatus (org.apache.hadoop.hdfs.protocol.HdfsFileStatus)7 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)7