use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestReadStripedFileWithDecoding method findFirstDataNode.
private int findFirstDataNode(Path file, long length) throws IOException {
BlockLocation[] locs = fs.getFileBlockLocations(file, 0, length);
String name = (locs[0].getNames())[0];
int dnIndex = 0;
for (DataNode dn : cluster.getDataNodes()) {
int port = dn.getXferPort();
if (name.contains(Integer.toString(port))) {
return dnIndex;
}
dnIndex++;
}
return -1;
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestReadStripedFileWithDecoding method testReadWithDNFailure.
private void testReadWithDNFailure(int fileLength, int dnFailureNum) throws Exception {
String fileType = fileLength < (blockSize * dataBlocks) ? "smallFile" : "largeFile";
String src = "/dnFailure_" + dnFailureNum + "_" + fileType;
LOG.info("testReadWithDNFailure: file = " + src + ", fileSize = " + fileLength + ", dnFailureNum = " + dnFailureNum);
Path testPath = new Path(src);
final byte[] bytes = StripedFileTestUtil.generateBytes(fileLength);
DFSTestUtil.writeFile(fs, testPath, bytes);
StripedFileTestUtil.waitBlockGroupsReported(fs, src);
// shut down the DN that holds an internal data block
BlockLocation[] locs = fs.getFileBlockLocations(testPath, cellSize * 5, cellSize);
for (int failedDnIdx = 0; failedDnIdx < dnFailureNum; failedDnIdx++) {
String name = (locs[0].getNames())[failedDnIdx];
for (DataNode dn : cluster.getDataNodes()) {
int port = dn.getXferPort();
if (name.contains(Integer.toString(port))) {
dn.shutdown();
}
}
}
// check file length, pread, stateful read and seek
verifyRead(testPath, fileLength, bytes);
}
use of org.apache.hadoop.fs.BlockLocation in project hadoop by apache.
the class TestOverReplicatedBlocks method testChooseReplicaToDelete.
/**
* The test verifies that replica for deletion is chosen on a node,
* with the oldest heartbeat, when this heartbeat is larger than the
* tolerable heartbeat interval.
* It creates a file with several blocks and replication 4.
* The last DN is configured to send heartbeats rarely.
*
* Test waits until the tolerable heartbeat interval expires, and reduces
* replication of the file. All replica deletions should be scheduled for the
* last node. No replicas will actually be deleted, since last DN doesn't
* send heartbeats.
*/
@Test
public void testChooseReplicaToDelete() throws Exception {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
fs = cluster.getFileSystem();
final FSNamesystem namesystem = cluster.getNamesystem();
final BlockManager bm = namesystem.getBlockManager();
conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
cluster.startDataNodes(conf, 1, true, null, null, null);
DataNode lastDN = cluster.getDataNodes().get(3);
DatanodeRegistration dnReg = InternalDataNodeTestUtils.getDNRegistrationForBP(lastDN, namesystem.getBlockPoolId());
String lastDNid = dnReg.getDatanodeUuid();
final Path fileName = new Path("/foo2");
DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short) 4, 0L);
DFSTestUtil.waitReplication(fs, fileName, (short) 4);
// Wait for tolerable number of heartbeats plus one
DatanodeDescriptor nodeInfo = null;
long lastHeartbeat = 0;
long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 * (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
do {
nodeInfo = bm.getDatanodeManager().getDatanode(dnReg);
lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
} while (monotonicNow() - lastHeartbeat < waitTime);
fs.setReplication(fileName, (short) 3);
BlockLocation[] locs = fs.getFileBlockLocations(fs.getFileStatus(fileName), 0, Long.MAX_VALUE);
// All replicas for deletion should be scheduled on lastDN.
// And should not actually be deleted, because lastDN does not heartbeat.
namesystem.readLock();
final int dnBlocks = bm.getExcessSize4Testing(dnReg.getDatanodeUuid());
assertEquals("Replicas on node " + lastDNid + " should have been deleted", SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks);
namesystem.readUnlock();
for (BlockLocation location : locs) assertEquals("Block should still have 4 replicas", 4, location.getNames().length);
} finally {
if (fs != null)
fs.close();
if (cluster != null)
cluster.shutdown();
}
}
use of org.apache.hadoop.fs.BlockLocation in project hbase by apache.
the class TestBlockReorder method testBlockLocationReorder.
/**
* Test that we're can add a hook, and that this hook works when we try to read the file in HDFS.
*/
@Test
public void testBlockLocationReorder() throws Exception {
Path p = new Path("hello");
Assert.assertTrue((short) cluster.getDataNodes().size() > 1);
final int repCount = 2;
// Let's write the file
FSDataOutputStream fop = dfs.create(p, (short) repCount);
final double toWrite = 875.5613;
fop.writeDouble(toWrite);
fop.close();
// Let's check we can read it when everybody's there
long start = System.currentTimeMillis();
FSDataInputStream fin = dfs.open(p);
Assert.assertTrue(toWrite == fin.readDouble());
long end = System.currentTimeMillis();
LOG.info("readtime= " + (end - start));
fin.close();
Assert.assertTrue((end - start) < 30 * 1000);
// Let's kill the first location. But actually the fist location returned will change
// The first thing to do is to get the location, then the port
FileStatus f = dfs.getFileStatus(p);
BlockLocation[] lbs;
do {
lbs = dfs.getFileBlockLocations(f, 0, 1);
} while (lbs.length != 1 && lbs[0].getLength() != repCount);
final String name = lbs[0].getNames()[0];
Assert.assertTrue(name.indexOf(':') > 0);
String portS = name.substring(name.indexOf(':') + 1);
final int port = Integer.parseInt(portS);
LOG.info("port= " + port);
int ipcPort = -1;
// Let's find the DN to kill. cluster.getDataNodes(int) is not on the same port, so we need
// to iterate ourselves.
boolean ok = false;
final String lookup = lbs[0].getHosts()[0];
StringBuilder sb = new StringBuilder();
for (DataNode dn : cluster.getDataNodes()) {
final String dnName = getHostName(dn);
sb.append(dnName).append(' ');
if (lookup.equals(dnName)) {
ok = true;
LOG.info("killing datanode " + name + " / " + lookup);
ipcPort = dn.ipcServer.getListenerAddress().getPort();
dn.shutdown();
LOG.info("killed datanode " + name + " / " + lookup);
break;
}
}
Assert.assertTrue("didn't find the server to kill, was looking for " + lookup + " found " + sb, ok);
LOG.info("ipc port= " + ipcPort);
// Add the hook, with an implementation checking that we don't use the port we've just killed.
Assert.assertTrue(HFileSystem.addLocationsOrderInterceptor(conf, new HFileSystem.ReorderBlocks() {
@Override
public void reorderBlocks(Configuration c, LocatedBlocks lbs, String src) {
for (LocatedBlock lb : lbs.getLocatedBlocks()) {
if (lb.getLocations().length > 1) {
DatanodeInfo[] infos = lb.getLocations();
if (infos[0].getHostName().equals(lookup)) {
LOG.info("HFileSystem bad host, inverting");
DatanodeInfo tmp = infos[0];
infos[0] = infos[1];
infos[1] = tmp;
}
}
}
}
}));
final int retries = 10;
ServerSocket ss = null;
ServerSocket ssI;
try {
// We're taking the port to have a timeout issue later.
ss = new ServerSocket(port);
ssI = new ServerSocket(ipcPort);
} catch (BindException be) {
LOG.warn("Got bind exception trying to set up socket on " + port + " or " + ipcPort + ", this means that the datanode has not closed the socket or" + " someone else took it. It may happen, skipping this test for this time.", be);
if (ss != null) {
ss.close();
}
return;
}
// so we try retries times; with the reorder it will never last more than a few milli seconds
for (int i = 0; i < retries; i++) {
start = System.currentTimeMillis();
fin = dfs.open(p);
Assert.assertTrue(toWrite == fin.readDouble());
fin.close();
end = System.currentTimeMillis();
LOG.info("HFileSystem readtime= " + (end - start));
Assert.assertFalse("We took too much time to read", (end - start) > 60000);
}
ss.close();
ssI.close();
}
use of org.apache.hadoop.fs.BlockLocation in project hbase by apache.
the class TestBlockReorder method testHBaseCluster.
/**
* Test that the hook works within HBase, including when there are multiple blocks.
*/
@Test()
public void testHBaseCluster() throws Exception {
byte[] sb = "sb".getBytes();
htu.startMiniZKCluster();
MiniHBaseCluster hbm = htu.startMiniHBaseCluster(1, 1);
hbm.waitForActiveAndReadyMaster();
HRegionServer targetRs = hbm.getMaster();
// We want to have a datanode with the same name as the region server, so
// we're going to get the regionservername, and start a new datanode with this name.
String host4 = targetRs.getServerName().getHostname();
LOG.info("Starting a new datanode with the name=" + host4);
cluster.startDataNodes(conf, 1, true, null, new String[] { "/r4" }, new String[] { host4 }, null);
cluster.waitClusterUp();
final int repCount = 3;
// We use the regionserver file system & conf as we expect it to have the hook.
conf = targetRs.getConfiguration();
HFileSystem rfs = (HFileSystem) targetRs.getFileSystem();
Table h = htu.createTable(TableName.valueOf(name.getMethodName()), sb);
// Now, we have 4 datanodes and a replication count of 3. So we don't know if the datanode
// with the same node will be used. We can't really stop an existing datanode, this would
// make us fall in nasty hdfs bugs/issues. So we're going to try multiple times.
// Now we need to find the log file, its locations, and look at it
String rootDir = new Path(FSUtils.getRootDir(conf) + "/" + HConstants.HREGION_LOGDIR_NAME + "/" + targetRs.getServerName().toString()).toUri().getPath();
DistributedFileSystem mdfs = (DistributedFileSystem) hbm.getMaster().getMasterFileSystem().getFileSystem();
int nbTest = 0;
while (nbTest < 10) {
final List<Region> regions = targetRs.getOnlineRegions(h.getName());
final CountDownLatch latch = new CountDownLatch(regions.size());
// listen for successful log rolls
final WALActionsListener listener = new WALActionsListener.Base() {
@Override
public void postLogRoll(final Path oldPath, final Path newPath) throws IOException {
latch.countDown();
}
};
for (Region region : regions) {
((HRegion) region).getWAL().registerWALActionsListener(listener);
}
htu.getAdmin().rollWALWriter(targetRs.getServerName());
// wait
try {
latch.await();
} catch (InterruptedException exception) {
LOG.warn("Interrupted while waiting for the wal of '" + targetRs + "' to roll. If later " + "tests fail, it's probably because we should still be waiting.");
Thread.currentThread().interrupt();
}
for (Region region : regions) {
((HRegion) region).getWAL().unregisterWALActionsListener(listener);
}
// We need a sleep as the namenode is informed asynchronously
Thread.sleep(100);
// insert one put to ensure a minimal size
Put p = new Put(sb);
p.addColumn(sb, sb, sb);
h.put(p);
DirectoryListing dl = dfs.getClient().listPaths(rootDir, HdfsFileStatus.EMPTY_NAME);
HdfsFileStatus[] hfs = dl.getPartialListing();
// As we wrote a put, we should have at least one log file.
Assert.assertTrue(hfs.length >= 1);
for (HdfsFileStatus hf : hfs) {
// Because this is a live cluster, log files might get archived while we're processing
try {
LOG.info("Log file found: " + hf.getLocalName() + " in " + rootDir);
String logFile = rootDir + "/" + hf.getLocalName();
FileStatus fsLog = rfs.getFileStatus(new Path(logFile));
LOG.info("Checking log file: " + logFile);
// Now checking that the hook is up and running
// We can't call directly getBlockLocations, it's not available in HFileSystem
// We're trying multiple times to be sure, as the order is random
BlockLocation[] bls = rfs.getFileBlockLocations(fsLog, 0, 1);
if (bls.length > 0) {
BlockLocation bl = bls[0];
LOG.info(bl.getHosts().length + " replicas for block 0 in " + logFile + " ");
for (int i = 0; i < bl.getHosts().length - 1; i++) {
LOG.info(bl.getHosts()[i] + " " + logFile);
Assert.assertNotSame(bl.getHosts()[i], host4);
}
String last = bl.getHosts()[bl.getHosts().length - 1];
LOG.info(last + " " + logFile);
if (host4.equals(last)) {
nbTest++;
LOG.info(logFile + " is on the new datanode and is ok");
if (bl.getHosts().length == 3) {
// We can test this case from the file system as well
// Checking the underlying file system. Multiple times as the order is random
testFromDFS(dfs, logFile, repCount, host4);
// now from the master
testFromDFS(mdfs, logFile, repCount, host4);
}
}
}
} catch (FileNotFoundException exception) {
LOG.debug("Failed to find log file '" + hf.getLocalName() + "'; it probably was " + "archived out from under us so we'll ignore and retry. If this test hangs " + "indefinitely you should treat this failure as a symptom.", exception);
} catch (RemoteException exception) {
if (exception.unwrapRemoteException() instanceof FileNotFoundException) {
LOG.debug("Failed to find log file '" + hf.getLocalName() + "'; it probably was " + "archived out from under us so we'll ignore and retry. If this test hangs " + "indefinitely you should treat this failure as a symptom.", exception);
} else {
throw exception;
}
}
}
}
}
Aggregations