use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestNameNodeMXBean method testNameNodeMXBeanInfo.
@SuppressWarnings({ "unchecked" })
@Test
public void testNameNodeMXBeanInfo() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, NativeIO.POSIX.getCacheManipulator().getMemlockLimit());
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
cluster.waitActive();
// Set upgrade domain on the first DN.
String upgradeDomain = "abcd";
DatanodeManager dm = cluster.getNameNode().getNamesystem().getBlockManager().getDatanodeManager();
DatanodeDescriptor dd = dm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
dd.setUpgradeDomain(upgradeDomain);
String dnXferAddrWithUpgradeDomainSet = dd.getXferAddr();
// Put the second DN to maintenance state.
DatanodeDescriptor maintenanceNode = dm.getDatanode(cluster.getDataNodes().get(1).getDatanodeId());
maintenanceNode.setInMaintenance();
String dnXferAddrInMaintenance = maintenanceNode.getXferAddr();
FSNamesystem fsn = cluster.getNameNode().namesystem;
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName mxbeanName = new ObjectName("Hadoop:service=NameNode,name=NameNodeInfo");
// get attribute "ClusterId"
String clusterId = (String) mbs.getAttribute(mxbeanName, "ClusterId");
assertEquals(fsn.getClusterId(), clusterId);
// get attribute "BlockPoolId"
String blockpoolId = (String) mbs.getAttribute(mxbeanName, "BlockPoolId");
assertEquals(fsn.getBlockPoolId(), blockpoolId);
// get attribute "Version"
String version = (String) mbs.getAttribute(mxbeanName, "Version");
assertEquals(fsn.getVersion(), version);
assertTrue(version.equals(VersionInfo.getVersion() + ", r" + VersionInfo.getRevision()));
// get attribute "Used"
Long used = (Long) mbs.getAttribute(mxbeanName, "Used");
assertEquals(fsn.getUsed(), used.longValue());
// get attribute "Total"
Long total = (Long) mbs.getAttribute(mxbeanName, "Total");
assertEquals(fsn.getTotal(), total.longValue());
// get attribute "safemode"
String safemode = (String) mbs.getAttribute(mxbeanName, "Safemode");
assertEquals(fsn.getSafemode(), safemode);
// get attribute nondfs
Long nondfs = (Long) (mbs.getAttribute(mxbeanName, "NonDfsUsedSpace"));
assertEquals(fsn.getNonDfsUsedSpace(), nondfs.longValue());
// get attribute percentremaining
Float percentremaining = (Float) (mbs.getAttribute(mxbeanName, "PercentRemaining"));
assertEquals(fsn.getPercentRemaining(), percentremaining, DELTA);
// get attribute Totalblocks
Long totalblocks = (Long) (mbs.getAttribute(mxbeanName, "TotalBlocks"));
assertEquals(fsn.getTotalBlocks(), totalblocks.longValue());
// get attribute alivenodeinfo
String alivenodeinfo = (String) (mbs.getAttribute(mxbeanName, "LiveNodes"));
Map<String, Map<String, Object>> liveNodes = (Map<String, Map<String, Object>>) JSON.parse(alivenodeinfo);
assertTrue(liveNodes.size() == 2);
for (Map<String, Object> liveNode : liveNodes.values()) {
assertTrue(liveNode.containsKey("nonDfsUsedSpace"));
assertTrue(((Long) liveNode.get("nonDfsUsedSpace")) >= 0);
assertTrue(liveNode.containsKey("capacity"));
assertTrue(((Long) liveNode.get("capacity")) > 0);
assertTrue(liveNode.containsKey("numBlocks"));
assertTrue(((Long) liveNode.get("numBlocks")) == 0);
assertTrue(liveNode.containsKey("lastBlockReport"));
// a. By default the upgrade domain isn't defined on any DN.
// b. If the upgrade domain is set on a DN, JMX should have the same
// value.
String xferAddr = (String) liveNode.get("xferaddr");
if (!xferAddr.equals(dnXferAddrWithUpgradeDomainSet)) {
assertTrue(!liveNode.containsKey("upgradeDomain"));
} else {
assertTrue(liveNode.get("upgradeDomain").equals(upgradeDomain));
}
// "adminState" is set to maintenance only for the specific dn.
boolean inMaintenance = liveNode.get("adminState").equals(DatanodeInfo.AdminStates.IN_MAINTENANCE.toString());
assertFalse(xferAddr.equals(dnXferAddrInMaintenance) ^ inMaintenance);
}
assertEquals(fsn.getLiveNodes(), alivenodeinfo);
// get attributes DeadNodes
String deadNodeInfo = (String) (mbs.getAttribute(mxbeanName, "DeadNodes"));
assertEquals(fsn.getDeadNodes(), deadNodeInfo);
// get attribute NodeUsage
String nodeUsage = (String) (mbs.getAttribute(mxbeanName, "NodeUsage"));
assertEquals("Bad value for NodeUsage", fsn.getNodeUsage(), nodeUsage);
// get attribute NameJournalStatus
String nameJournalStatus = (String) (mbs.getAttribute(mxbeanName, "NameJournalStatus"));
assertEquals("Bad value for NameJournalStatus", fsn.getNameJournalStatus(), nameJournalStatus);
// get attribute JournalTransactionInfo
String journalTxnInfo = (String) mbs.getAttribute(mxbeanName, "JournalTransactionInfo");
assertEquals("Bad value for NameTxnIds", fsn.getJournalTransactionInfo(), journalTxnInfo);
// get attribute "CompileInfo"
String compileInfo = (String) mbs.getAttribute(mxbeanName, "CompileInfo");
assertEquals("Bad value for CompileInfo", fsn.getCompileInfo(), compileInfo);
// get attribute CorruptFiles
String corruptFiles = (String) (mbs.getAttribute(mxbeanName, "CorruptFiles"));
assertEquals("Bad value for CorruptFiles", fsn.getCorruptFiles(), corruptFiles);
// get attribute NameDirStatuses
String nameDirStatuses = (String) (mbs.getAttribute(mxbeanName, "NameDirStatuses"));
assertEquals(fsn.getNameDirStatuses(), nameDirStatuses);
Map<String, Map<String, String>> statusMap = (Map<String, Map<String, String>>) JSON.parse(nameDirStatuses);
Collection<URI> nameDirUris = cluster.getNameDirs(0);
for (URI nameDirUri : nameDirUris) {
File nameDir = new File(nameDirUri);
System.out.println("Checking for the presence of " + nameDir + " in active name dirs.");
assertTrue(statusMap.get("active").containsKey(nameDir.getAbsolutePath()));
}
assertEquals(2, statusMap.get("active").size());
assertEquals(0, statusMap.get("failed").size());
// This will cause the first dir to fail.
File failedNameDir = new File(nameDirUris.iterator().next());
assertEquals(0, FileUtil.chmod(new File(failedNameDir, "current").getAbsolutePath(), "000"));
cluster.getNameNodeRpc().rollEditLog();
nameDirStatuses = (String) (mbs.getAttribute(mxbeanName, "NameDirStatuses"));
statusMap = (Map<String, Map<String, String>>) JSON.parse(nameDirStatuses);
for (URI nameDirUri : nameDirUris) {
File nameDir = new File(nameDirUri);
String expectedStatus = nameDir.equals(failedNameDir) ? "failed" : "active";
System.out.println("Checking for the presence of " + nameDir + " in " + expectedStatus + " name dirs.");
assertTrue(statusMap.get(expectedStatus).containsKey(nameDir.getAbsolutePath()));
}
assertEquals(1, statusMap.get("active").size());
assertEquals(1, statusMap.get("failed").size());
assertEquals(0L, mbs.getAttribute(mxbeanName, "CacheUsed"));
assertEquals(NativeIO.POSIX.getCacheManipulator().getMemlockLimit() * cluster.getDataNodes().size(), mbs.getAttribute(mxbeanName, "CacheCapacity"));
assertNull("RollingUpgradeInfo should be null when there is no rolling" + " upgrade", mbs.getAttribute(mxbeanName, "RollingUpgradeStatus"));
} finally {
if (cluster != null) {
for (URI dir : cluster.getNameDirs(0)) {
FileUtil.chmod(new File(new File(dir), "current").getAbsolutePath(), "755");
}
cluster.shutdown();
}
}
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestNameNodeReconfigure method testReconfigureHearbeatCheck.
/**
* Test to reconfigure interval of heart beat check and re-check.
*/
@Test
public void testReconfigureHearbeatCheck() throws ReconfigurationException {
final NameNode nameNode = cluster.getNameNode();
final DatanodeManager datanodeManager = nameNode.namesystem.getBlockManager().getDatanodeManager();
// change properties
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "" + 6);
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "" + (10 * 60 * 1000));
// try invalid values
try {
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "text");
fail("ReconfigurationException expected");
} catch (ReconfigurationException expected) {
assertTrue(expected.getCause() instanceof NumberFormatException);
}
try {
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "text");
fail("ReconfigurationException expected");
} catch (ReconfigurationException expected) {
assertTrue(expected.getCause() instanceof NumberFormatException);
}
// verify change
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, nameNode.getConf().getLong(DFS_HEARTBEAT_INTERVAL_KEY, DFS_HEARTBEAT_INTERVAL_DEFAULT));
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, datanodeManager.getHeartbeatInterval());
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, nameNode.getConf().getInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT));
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, datanodeManager.getHeartbeatRecheckInterval());
// revert to defaults
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, null);
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, null);
// verify defaults
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_HEARTBEAT_INTERVAL_KEY));
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", DFS_HEARTBEAT_INTERVAL_DEFAULT, datanodeManager.getHeartbeatInterval());
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY));
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT, datanodeManager.getHeartbeatRecheckInterval());
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestFsck method testBlockIdCKMaintenance.
/**
* Test for blockIdCK with datanode maintenance.
*/
@Test(timeout = 90000)
public void testBlockIdCKMaintenance() throws Exception {
final short replFactor = 2;
short numDn = 2;
final long blockSize = 512;
String[] hosts = { "host1", "host2" };
String[] racks = { "/rack1", "/rack2" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, replFactor);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String pathString = new String("/testfile");
final Path path = new Path(pathString);
util.createFile(dfs, path, 1024, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb : util.getAllBlocks(dfs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");
//make sure datanode that has replica is fine before maintenance
String outStr = runFsck(conf, 0, true, "/", "-maintenance", "-blockId", bIds[0]);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
bm.getDatanodeManager().getDecomManager().startMaintenance(dn, Long.MAX_VALUE);
final String dnName = dn.getXferAddr();
//wait for the node to enter maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
String fsckOut = runFsck(conf, 5, false, "/", "-maintenance", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(NamenodeFsck.ENTERING_MAINTENANCE_STATUS));
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// Start 3rd DataNode
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
// Wait for 1st node to reach in maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
try {
DatanodeInfo datanodeInfo = null;
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
//check in maintenance node
String fsckOut = runFsck(conf, 4, false, "/", "-maintenance", "-blockId", bIds[0]);
assertTrue(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
//check in maintenance node are not printed when not requested
fsckOut = runFsck(conf, 4, false, "/", "-blockId", bIds[0]);
assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class TestFsck method testFsckWithMaintenanceReplicas.
/**
* Test for blocks on maintenance hosts are not shown as missing.
*/
@Test(timeout = 90000)
public void testFsckWithMaintenanceReplicas() throws Exception {
final short replFactor = 2;
short numDn = 2;
final long blockSize = 512;
String[] hosts = { "host1", "host2" };
String[] racks = { "/rack1", "/rack2" };
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, replFactor);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_MAINTENANCE_REPLICATION_MIN_KEY, replFactor);
DistributedFileSystem dfs;
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDn).hosts(hosts).racks(racks).build();
assertNotNull("Failed Cluster Creation", cluster);
cluster.waitClusterUp();
dfs = cluster.getFileSystem();
assertNotNull("Failed to get FileSystem", dfs);
DFSTestUtil util = new DFSTestUtil.Builder().setName(getClass().getSimpleName()).setNumFiles(1).build();
//create files
final String testFile = new String("/testfile");
final Path path = new Path(testFile);
util.createFile(dfs, path, 1024, replFactor, 1000L);
util.waitReplication(dfs, path, replFactor);
StringBuilder sb = new StringBuilder();
for (LocatedBlock lb : util.getAllBlocks(dfs, path)) {
sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
}
String[] bIds = sb.toString().split(" ");
//make sure datanode that has replica is fine before maintenance
String outStr = runFsck(conf, 0, true, testFile);
System.out.println(outStr);
assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
FSNamesystem fsn = cluster.getNameNode().getNamesystem();
BlockManager bm = fsn.getBlockManager();
DatanodeManager dnm = bm.getDatanodeManager();
DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0).getDatanodeId());
bm.getDatanodeManager().getDecomManager().startMaintenance(dn, Long.MAX_VALUE);
final String dnName = dn.getXferAddr();
//wait for the node to enter maintenance state
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isEnteringMaintenance()) {
// verify fsck returns Healthy status
String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// Start 3rd DataNode and wait for node to reach in maintenance state
cluster.startDataNodes(conf, 1, true, null, new String[] { "/rack3" }, new String[] { "host3" }, null, false);
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
DatanodeInfo datanodeInfo = null;
try {
for (DatanodeInfo info : dfs.getDataNodeStats()) {
if (dnName.equals(info.getXferAddr())) {
datanodeInfo = info;
}
}
if (datanodeInfo != null && datanodeInfo.isInMaintenance()) {
return true;
}
} catch (Exception e) {
LOG.warn("Unexpected exception: " + e);
return false;
}
return false;
}
}, 500, 30000);
// verify fsck returns Healthy status
String fsckOut = runFsck(conf, 0, true, testFile, "-maintenance");
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
// verify fsck returns Healthy status even without maintenance option
fsckOut = runFsck(conf, 0, true, testFile);
assertTrue(fsckOut.contains(NamenodeFsck.HEALTHY_STATUS));
}
use of org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager in project hadoop by apache.
the class AdminStatesBaseTest method putNodeInService.
/* Ask a specific NN to put the datanode in service and wait for it
* to reach the NORMAL state.
*/
protected void putNodeInService(int nnIndex, DatanodeInfo outOfServiceNode) throws IOException {
LOG.info("Putting node: " + outOfServiceNode + " in service");
ArrayList<String> decommissionNodes = new ArrayList<>();
Map<String, Long> maintenanceNodes = new HashMap<>();
DatanodeManager dm = cluster.getNamesystem(nnIndex).getBlockManager().getDatanodeManager();
List<DatanodeDescriptor> nodes = dm.getDatanodeListForReport(DatanodeReportType.ALL);
for (DatanodeDescriptor node : nodes) {
if (node.isMaintenance()) {
maintenanceNodes.put(node.getName(), node.getMaintenanceExpireTimeInMS());
} else if (node.isDecommissionInProgress() || node.isDecommissioned()) {
decommissionNodes.add(node.getName());
}
}
decommissionNodes.remove(outOfServiceNode.getName());
maintenanceNodes.remove(outOfServiceNode.getName());
hostsFileWriter.initOutOfServiceHosts(decommissionNodes, maintenanceNodes);
refreshNodes(nnIndex);
waitNodeState(outOfServiceNode, AdminStates.NORMAL);
}
Aggregations