Search in sources :

Example 56 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.

the class TestBlockReorder method testBlockLocation.

/**
   * Test that the reorder algo works as we expect.
   */
@Test
public void testBlockLocation() throws Exception {
    // We need to start HBase to get  HConstants.HBASE_DIR set in conf
    htu.startMiniZKCluster();
    MiniHBaseCluster hbm = htu.startMiniHBaseCluster(1, 1);
    conf = hbm.getConfiguration();
    // The "/" is mandatory, without it we've got a null pointer exception on the namenode
    final String fileName = "/helloWorld";
    Path p = new Path(fileName);
    final int repCount = 3;
    Assert.assertTrue((short) cluster.getDataNodes().size() >= repCount);
    // Let's write the file
    FSDataOutputStream fop = dfs.create(p, (short) repCount);
    final double toWrite = 875.5613;
    fop.writeDouble(toWrite);
    fop.close();
    for (int i = 0; i < 10; i++) {
        // The interceptor is not set in this test, so we get the raw list at this point
        LocatedBlocks l;
        final long max = System.currentTimeMillis() + 10000;
        do {
            l = getNamenode(dfs.getClient()).getBlockLocations(fileName, 0, 1);
            Assert.assertNotNull(l.getLocatedBlocks());
            Assert.assertEquals(l.getLocatedBlocks().size(), 1);
            Assert.assertTrue("Expecting " + repCount + " , got " + l.get(0).getLocations().length, System.currentTimeMillis() < max);
        } while (l.get(0).getLocations().length != repCount);
        // Should be filtered, the name is different => The order won't change
        Object[] originalList = l.getLocatedBlocks().toArray();
        HFileSystem.ReorderWALBlocks lrb = new HFileSystem.ReorderWALBlocks();
        lrb.reorderBlocks(conf, l, fileName);
        Assert.assertArrayEquals(originalList, l.getLocatedBlocks().toArray());
        // Should be reordered, as we pretend to be a file name with a compliant stuff
        Assert.assertNotNull(conf.get(HConstants.HBASE_DIR));
        Assert.assertFalse(conf.get(HConstants.HBASE_DIR).isEmpty());
        String pseudoLogFile = conf.get(HConstants.HBASE_DIR) + "/" + HConstants.HREGION_LOGDIR_NAME + "/" + host1 + ",6977,6576" + "/mylogfile";
        // Check that it will be possible to extract a ServerName from our construction
        Assert.assertNotNull("log= " + pseudoLogFile, AbstractFSWALProvider.getServerNameFromWALDirectoryName(dfs.getConf(), pseudoLogFile));
        // And check we're doing the right reorder.
        lrb.reorderBlocks(conf, l, pseudoLogFile);
        Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
        // Check again, it should remain the same.
        lrb.reorderBlocks(conf, l, pseudoLogFile);
        Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) MiniHBaseCluster(org.apache.hadoop.hbase.MiniHBaseCluster) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 57 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class NamenodeWebHdfsMethods method chooseDatanode.

@VisibleForTesting
static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, final String excludeDatanodes, final String remoteAddr) throws IOException {
    FSNamesystem fsn = namenode.getNamesystem();
    if (fsn == null) {
        throw new IOException("Namesystem has not been intialized yet.");
    }
    final BlockManager bm = fsn.getBlockManager();
    HashSet<Node> excludes = new HashSet<Node>();
    if (excludeDatanodes != null) {
        for (String host : StringUtils.getTrimmedStringCollection(excludeDatanodes)) {
            int idx = host.indexOf(":");
            if (idx != -1) {
                excludes.add(bm.getDatanodeManager().getDatanodeByXferAddr(host.substring(0, idx), Integer.parseInt(host.substring(idx + 1))));
            } else {
                excludes.add(bm.getDatanodeManager().getDatanodeByHost(host));
            }
        }
    }
    if (op == PutOpParam.Op.CREATE) {
        //choose a datanode near to client 
        final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(remoteAddr);
        if (clientNode != null) {
            final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS(path, clientNode, excludes, blocksize);
            if (storages.length > 0) {
                return storages[0].getDatanodeDescriptor();
            }
        }
    } else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) {
        //choose a datanode containing a replica 
        final NamenodeProtocols np = getRPCServer(namenode);
        final HdfsFileStatus status = np.getFileInfo(path);
        if (status == null) {
            throw new FileNotFoundException("File " + path + " not found.");
        }
        final long len = status.getLen();
        if (op == GetOpParam.Op.OPEN) {
            if (openOffset < 0L || (openOffset >= len && len > 0)) {
                throw new IOException("Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path);
            }
        }
        if (len > 0) {
            final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
            final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
            final int count = locations.locatedBlockCount();
            if (count > 0) {
                return bestNode(locations.get(0).getLocations(), excludes);
            }
        }
    }
    return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT, excludes);
}
Also used : NamenodeProtocols(org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols) NameNode(org.apache.hadoop.hdfs.server.namenode.NameNode) Node(org.apache.hadoop.net.Node) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) DatanodeDescriptor(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor) DatanodeStorageInfo(org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo) BlockManager(org.apache.hadoop.hdfs.server.blockmanagement.BlockManager) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) FSNamesystem(org.apache.hadoop.hdfs.server.namenode.FSNamesystem) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 58 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.

the class TestBlockReorder method testBlockLocationReorder.

/**
   * Test that we're can add a hook, and that this hook works when we try to read the file in HDFS.
   */
@Test
public void testBlockLocationReorder() throws Exception {
    Path p = new Path("hello");
    Assert.assertTrue((short) cluster.getDataNodes().size() > 1);
    final int repCount = 2;
    // Let's write the file
    FSDataOutputStream fop = dfs.create(p, (short) repCount);
    final double toWrite = 875.5613;
    fop.writeDouble(toWrite);
    fop.close();
    // Let's check we can read it when everybody's there
    long start = System.currentTimeMillis();
    FSDataInputStream fin = dfs.open(p);
    Assert.assertTrue(toWrite == fin.readDouble());
    long end = System.currentTimeMillis();
    LOG.info("readtime= " + (end - start));
    fin.close();
    Assert.assertTrue((end - start) < 30 * 1000);
    // Let's kill the first location. But actually the fist location returned will change
    // The first thing to do is to get the location, then the port
    FileStatus f = dfs.getFileStatus(p);
    BlockLocation[] lbs;
    do {
        lbs = dfs.getFileBlockLocations(f, 0, 1);
    } while (lbs.length != 1 && lbs[0].getLength() != repCount);
    final String name = lbs[0].getNames()[0];
    Assert.assertTrue(name.indexOf(':') > 0);
    String portS = name.substring(name.indexOf(':') + 1);
    final int port = Integer.parseInt(portS);
    LOG.info("port= " + port);
    int ipcPort = -1;
    // Let's find the DN to kill. cluster.getDataNodes(int) is not on the same port, so we need
    // to iterate ourselves.
    boolean ok = false;
    final String lookup = lbs[0].getHosts()[0];
    StringBuilder sb = new StringBuilder();
    for (DataNode dn : cluster.getDataNodes()) {
        final String dnName = getHostName(dn);
        sb.append(dnName).append(' ');
        if (lookup.equals(dnName)) {
            ok = true;
            LOG.info("killing datanode " + name + " / " + lookup);
            ipcPort = dn.ipcServer.getListenerAddress().getPort();
            dn.shutdown();
            LOG.info("killed datanode " + name + " / " + lookup);
            break;
        }
    }
    Assert.assertTrue("didn't find the server to kill, was looking for " + lookup + " found " + sb, ok);
    LOG.info("ipc port= " + ipcPort);
    // Add the hook, with an implementation checking that we don't use the port we've just killed.
    Assert.assertTrue(HFileSystem.addLocationsOrderInterceptor(conf, new HFileSystem.ReorderBlocks() {

        @Override
        public void reorderBlocks(Configuration c, LocatedBlocks lbs, String src) {
            for (LocatedBlock lb : lbs.getLocatedBlocks()) {
                if (lb.getLocations().length > 1) {
                    DatanodeInfo[] infos = lb.getLocations();
                    if (infos[0].getHostName().equals(lookup)) {
                        LOG.info("HFileSystem bad host, inverting");
                        DatanodeInfo tmp = infos[0];
                        infos[0] = infos[1];
                        infos[1] = tmp;
                    }
                }
            }
        }
    }));
    final int retries = 10;
    ServerSocket ss = null;
    ServerSocket ssI;
    try {
        // We're taking the port to have a timeout issue later.
        ss = new ServerSocket(port);
        ssI = new ServerSocket(ipcPort);
    } catch (BindException be) {
        LOG.warn("Got bind exception trying to set up socket on " + port + " or " + ipcPort + ", this means that the datanode has not closed the socket or" + " someone else took it. It may happen, skipping this test for this time.", be);
        if (ss != null) {
            ss.close();
        }
        return;
    }
    // so we try retries times;  with the reorder it will never last more than a few milli seconds
    for (int i = 0; i < retries; i++) {
        start = System.currentTimeMillis();
        fin = dfs.open(p);
        Assert.assertTrue(toWrite == fin.readDouble());
        fin.close();
        end = System.currentTimeMillis();
        LOG.info("HFileSystem readtime= " + (end - start));
        Assert.assertFalse("We took too much time to read", (end - start) > 60000);
    }
    ss.close();
    ssI.close();
}
Also used : Path(org.apache.hadoop.fs.Path) DatanodeInfo(org.apache.hadoop.hdfs.protocol.DatanodeInfo) FileStatus(org.apache.hadoop.fs.FileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) LocatedBlock(org.apache.hadoop.hdfs.protocol.LocatedBlock) BindException(java.net.BindException) ServerSocket(java.net.ServerSocket) BlockLocation(org.apache.hadoop.fs.BlockLocation) DataNode(org.apache.hadoop.hdfs.server.datanode.DataNode) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 59 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class DFSInputStream method fetchBlockAt.

/** Fetch a block from namenode and cache it */
private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) throws IOException {
    synchronized (infoLock) {
        int targetBlockIdx = locatedBlocks.findBlock(offset);
        if (targetBlockIdx < 0) {
            // block is not cached
            targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
            useCache = false;
        }
        if (!useCache) {
            // fetch blocks
            final LocatedBlocks newBlocks = (length == 0) ? dfsClient.getLocatedBlocks(src, offset) : dfsClient.getLocatedBlocks(src, offset, length);
            if (newBlocks == null || newBlocks.locatedBlockCount() == 0) {
                throw new EOFException("Could not find target position " + offset);
            }
            locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
        }
        return locatedBlocks.get(targetBlockIdx);
    }
}
Also used : LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) EOFException(java.io.EOFException)

Example 60 with LocatedBlocks

use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.

the class DFSClient method getBlockLocations.

/**
   * Get block location info about file
   *
   * getBlockLocations() returns a list of hostnames that store
   * data for a specific file region.  It returns a set of hostnames
   * for every block within the indicated region.
   *
   * This function is very useful when writing code that considers
   * data-placement when performing operations.  For example, the
   * MapReduce system tries to schedule tasks on the same machines
   * as the data-block the task processes.
   */
public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException {
    checkOpen();
    try (TraceScope ignored = newPathTraceScope("getBlockLocations", src)) {
        LocatedBlocks blocks = getLocatedBlocks(src, start, length);
        BlockLocation[] locations = DFSUtilClient.locatedBlocks2Locations(blocks);
        HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
        for (int i = 0; i < locations.length; i++) {
            hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
        }
        return hdfsLocations;
    }
}
Also used : HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation) LocatedBlocks(org.apache.hadoop.hdfs.protocol.LocatedBlocks) TraceScope(org.apache.htrace.core.TraceScope) BlockLocation(org.apache.hadoop.fs.BlockLocation) HdfsBlockLocation(org.apache.hadoop.fs.HdfsBlockLocation)

Aggregations

LocatedBlocks (org.apache.hadoop.hdfs.protocol.LocatedBlocks)118 Test (org.junit.Test)67 Path (org.apache.hadoop.fs.Path)65 LocatedBlock (org.apache.hadoop.hdfs.protocol.LocatedBlock)52 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)33 DatanodeInfo (org.apache.hadoop.hdfs.protocol.DatanodeInfo)32 Configuration (org.apache.hadoop.conf.Configuration)29 IOException (java.io.IOException)20 ExtendedBlock (org.apache.hadoop.hdfs.protocol.ExtendedBlock)20 DataNode (org.apache.hadoop.hdfs.server.datanode.DataNode)20 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)18 FileSystem (org.apache.hadoop.fs.FileSystem)17 LocatedStripedBlock (org.apache.hadoop.hdfs.protocol.LocatedStripedBlock)17 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)13 Block (org.apache.hadoop.hdfs.protocol.Block)11 InetSocketAddress (java.net.InetSocketAddress)10 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)10 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)9 HdfsFileStatus (org.apache.hadoop.hdfs.protocol.HdfsFileStatus)7 BlockManager (org.apache.hadoop.hdfs.server.blockmanagement.BlockManager)7