use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.
the class TestBlockReorder method testBlockLocation.
/**
* Test that the reorder algo works as we expect.
*/
@Test
public void testBlockLocation() throws Exception {
// We need to start HBase to get HConstants.HBASE_DIR set in conf
htu.startMiniZKCluster();
MiniHBaseCluster hbm = htu.startMiniHBaseCluster(1, 1);
conf = hbm.getConfiguration();
// The "/" is mandatory, without it we've got a null pointer exception on the namenode
final String fileName = "/helloWorld";
Path p = new Path(fileName);
final int repCount = 3;
Assert.assertTrue((short) cluster.getDataNodes().size() >= repCount);
// Let's write the file
FSDataOutputStream fop = dfs.create(p, (short) repCount);
final double toWrite = 875.5613;
fop.writeDouble(toWrite);
fop.close();
for (int i = 0; i < 10; i++) {
// The interceptor is not set in this test, so we get the raw list at this point
LocatedBlocks l;
final long max = System.currentTimeMillis() + 10000;
do {
l = getNamenode(dfs.getClient()).getBlockLocations(fileName, 0, 1);
Assert.assertNotNull(l.getLocatedBlocks());
Assert.assertEquals(l.getLocatedBlocks().size(), 1);
Assert.assertTrue("Expecting " + repCount + " , got " + l.get(0).getLocations().length, System.currentTimeMillis() < max);
} while (l.get(0).getLocations().length != repCount);
// Should be filtered, the name is different => The order won't change
Object[] originalList = l.getLocatedBlocks().toArray();
HFileSystem.ReorderWALBlocks lrb = new HFileSystem.ReorderWALBlocks();
lrb.reorderBlocks(conf, l, fileName);
Assert.assertArrayEquals(originalList, l.getLocatedBlocks().toArray());
// Should be reordered, as we pretend to be a file name with a compliant stuff
Assert.assertNotNull(conf.get(HConstants.HBASE_DIR));
Assert.assertFalse(conf.get(HConstants.HBASE_DIR).isEmpty());
String pseudoLogFile = conf.get(HConstants.HBASE_DIR) + "/" + HConstants.HREGION_LOGDIR_NAME + "/" + host1 + ",6977,6576" + "/mylogfile";
// Check that it will be possible to extract a ServerName from our construction
Assert.assertNotNull("log= " + pseudoLogFile, AbstractFSWALProvider.getServerNameFromWALDirectoryName(dfs.getConf(), pseudoLogFile));
// And check we're doing the right reorder.
lrb.reorderBlocks(conf, l, pseudoLogFile);
Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
// Check again, it should remain the same.
lrb.reorderBlocks(conf, l, pseudoLogFile);
Assert.assertEquals(host1, l.get(0).getLocations()[2].getHostName());
}
}
use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.
the class NamenodeWebHdfsMethods method chooseDatanode.
@VisibleForTesting
static DatanodeInfo chooseDatanode(final NameNode namenode, final String path, final HttpOpParam.Op op, final long openOffset, final long blocksize, final String excludeDatanodes, final String remoteAddr) throws IOException {
FSNamesystem fsn = namenode.getNamesystem();
if (fsn == null) {
throw new IOException("Namesystem has not been intialized yet.");
}
final BlockManager bm = fsn.getBlockManager();
HashSet<Node> excludes = new HashSet<Node>();
if (excludeDatanodes != null) {
for (String host : StringUtils.getTrimmedStringCollection(excludeDatanodes)) {
int idx = host.indexOf(":");
if (idx != -1) {
excludes.add(bm.getDatanodeManager().getDatanodeByXferAddr(host.substring(0, idx), Integer.parseInt(host.substring(idx + 1))));
} else {
excludes.add(bm.getDatanodeManager().getDatanodeByHost(host));
}
}
}
if (op == PutOpParam.Op.CREATE) {
//choose a datanode near to client
final DatanodeDescriptor clientNode = bm.getDatanodeManager().getDatanodeByHost(remoteAddr);
if (clientNode != null) {
final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS(path, clientNode, excludes, blocksize);
if (storages.length > 0) {
return storages[0].getDatanodeDescriptor();
}
}
} else if (op == GetOpParam.Op.OPEN || op == GetOpParam.Op.GETFILECHECKSUM || op == PostOpParam.Op.APPEND) {
//choose a datanode containing a replica
final NamenodeProtocols np = getRPCServer(namenode);
final HdfsFileStatus status = np.getFileInfo(path);
if (status == null) {
throw new FileNotFoundException("File " + path + " not found.");
}
final long len = status.getLen();
if (op == GetOpParam.Op.OPEN) {
if (openOffset < 0L || (openOffset >= len && len > 0)) {
throw new IOException("Offset=" + openOffset + " out of the range [0, " + len + "); " + op + ", path=" + path);
}
}
if (len > 0) {
final long offset = op == GetOpParam.Op.OPEN ? openOffset : len - 1;
final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
final int count = locations.locatedBlockCount();
if (count > 0) {
return bestNode(locations.get(0).getLocations(), excludes);
}
}
}
return (DatanodeDescriptor) bm.getDatanodeManager().getNetworkTopology().chooseRandom(NodeBase.ROOT, excludes);
}
use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hbase by apache.
the class TestBlockReorder method testBlockLocationReorder.
/**
* Test that we're can add a hook, and that this hook works when we try to read the file in HDFS.
*/
@Test
public void testBlockLocationReorder() throws Exception {
Path p = new Path("hello");
Assert.assertTrue((short) cluster.getDataNodes().size() > 1);
final int repCount = 2;
// Let's write the file
FSDataOutputStream fop = dfs.create(p, (short) repCount);
final double toWrite = 875.5613;
fop.writeDouble(toWrite);
fop.close();
// Let's check we can read it when everybody's there
long start = System.currentTimeMillis();
FSDataInputStream fin = dfs.open(p);
Assert.assertTrue(toWrite == fin.readDouble());
long end = System.currentTimeMillis();
LOG.info("readtime= " + (end - start));
fin.close();
Assert.assertTrue((end - start) < 30 * 1000);
// Let's kill the first location. But actually the fist location returned will change
// The first thing to do is to get the location, then the port
FileStatus f = dfs.getFileStatus(p);
BlockLocation[] lbs;
do {
lbs = dfs.getFileBlockLocations(f, 0, 1);
} while (lbs.length != 1 && lbs[0].getLength() != repCount);
final String name = lbs[0].getNames()[0];
Assert.assertTrue(name.indexOf(':') > 0);
String portS = name.substring(name.indexOf(':') + 1);
final int port = Integer.parseInt(portS);
LOG.info("port= " + port);
int ipcPort = -1;
// Let's find the DN to kill. cluster.getDataNodes(int) is not on the same port, so we need
// to iterate ourselves.
boolean ok = false;
final String lookup = lbs[0].getHosts()[0];
StringBuilder sb = new StringBuilder();
for (DataNode dn : cluster.getDataNodes()) {
final String dnName = getHostName(dn);
sb.append(dnName).append(' ');
if (lookup.equals(dnName)) {
ok = true;
LOG.info("killing datanode " + name + " / " + lookup);
ipcPort = dn.ipcServer.getListenerAddress().getPort();
dn.shutdown();
LOG.info("killed datanode " + name + " / " + lookup);
break;
}
}
Assert.assertTrue("didn't find the server to kill, was looking for " + lookup + " found " + sb, ok);
LOG.info("ipc port= " + ipcPort);
// Add the hook, with an implementation checking that we don't use the port we've just killed.
Assert.assertTrue(HFileSystem.addLocationsOrderInterceptor(conf, new HFileSystem.ReorderBlocks() {
@Override
public void reorderBlocks(Configuration c, LocatedBlocks lbs, String src) {
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
if (lb.getLocations().length > 1) {
DatanodeInfo[] infos = lb.getLocations();
if (infos[0].getHostName().equals(lookup)) {
LOG.info("HFileSystem bad host, inverting");
DatanodeInfo tmp = infos[0];
infos[0] = infos[1];
infos[1] = tmp;
}
}
}
}
}));
final int retries = 10;
ServerSocket ss = null;
ServerSocket ssI;
try {
// We're taking the port to have a timeout issue later.
ss = new ServerSocket(port);
ssI = new ServerSocket(ipcPort);
} catch (BindException be) {
LOG.warn("Got bind exception trying to set up socket on " + port + " or " + ipcPort + ", this means that the datanode has not closed the socket or" + " someone else took it. It may happen, skipping this test for this time.", be);
if (ss != null) {
ss.close();
}
return;
}
// so we try retries times; with the reorder it will never last more than a few milli seconds
for (int i = 0; i < retries; i++) {
start = System.currentTimeMillis();
fin = dfs.open(p);
Assert.assertTrue(toWrite == fin.readDouble());
fin.close();
end = System.currentTimeMillis();
LOG.info("HFileSystem readtime= " + (end - start));
Assert.assertFalse("We took too much time to read", (end - start) > 60000);
}
ss.close();
ssI.close();
}
use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.
the class DFSInputStream method fetchBlockAt.
/** Fetch a block from namenode and cache it */
private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) throws IOException {
synchronized (infoLock) {
int targetBlockIdx = locatedBlocks.findBlock(offset);
if (targetBlockIdx < 0) {
// block is not cached
targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx);
useCache = false;
}
if (!useCache) {
// fetch blocks
final LocatedBlocks newBlocks = (length == 0) ? dfsClient.getLocatedBlocks(src, offset) : dfsClient.getLocatedBlocks(src, offset, length);
if (newBlocks == null || newBlocks.locatedBlockCount() == 0) {
throw new EOFException("Could not find target position " + offset);
}
locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks());
}
return locatedBlocks.get(targetBlockIdx);
}
}
use of org.apache.hadoop.hdfs.protocol.LocatedBlocks in project hadoop by apache.
the class DFSClient method getBlockLocations.
/**
* Get block location info about file
*
* getBlockLocations() returns a list of hostnames that store
* data for a specific file region. It returns a set of hostnames
* for every block within the indicated region.
*
* This function is very useful when writing code that considers
* data-placement when performing operations. For example, the
* MapReduce system tries to schedule tasks on the same machines
* as the data-block the task processes.
*/
public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException {
checkOpen();
try (TraceScope ignored = newPathTraceScope("getBlockLocations", src)) {
LocatedBlocks blocks = getLocatedBlocks(src, start, length);
BlockLocation[] locations = DFSUtilClient.locatedBlocks2Locations(blocks);
HdfsBlockLocation[] hdfsLocations = new HdfsBlockLocation[locations.length];
for (int i = 0; i < locations.length; i++) {
hdfsLocations[i] = new HdfsBlockLocation(locations[i], blocks.get(i));
}
return hdfsLocations;
}
}
Aggregations