use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestDecommissionWithStriped method testDecommissionWithURBlockForSameBlockGroup.
@Test(timeout = 120000)
public void testDecommissionWithURBlockForSameBlockGroup() throws Exception {
LOG.info("Starting test testDecommissionWithURBlocksForSameBlockGroup");
final Path ecFile = new Path(ecDir, "testDecommissionWithCorruptBlocks");
int writeBytes = cellSize * dataBlocks * 2;
writeStripedFile(dfs, ecFile, writeBytes);
Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks());
final List<DatanodeInfo> decommisionNodes = new ArrayList<DatanodeInfo>();
LocatedBlock lb = dfs.getClient().getLocatedBlocks(ecFile.toString(), 0).get(0);
DatanodeInfo[] dnLocs = lb.getLocations();
assertEquals(dataBlocks + parityBlocks, dnLocs.length);
int decommNodeIndex = dataBlocks - 1;
int stopNodeIndex = 1;
// add the nodes which will be decommissioning
decommisionNodes.add(dnLocs[decommNodeIndex]);
// stop excess dns to avoid immediate reconstruction.
DatanodeInfo[] info = client.datanodeReport(DatanodeReportType.LIVE);
List<DataNodeProperties> stoppedDns = new ArrayList<>();
for (DatanodeInfo liveDn : info) {
boolean usedNode = false;
for (DatanodeInfo datanodeInfo : dnLocs) {
if (liveDn.getXferAddr().equals(datanodeInfo.getXferAddr())) {
usedNode = true;
break;
}
}
if (!usedNode) {
DataNode dn = cluster.getDataNode(liveDn.getIpcPort());
stoppedDns.add(cluster.stopDataNode(liveDn.getXferAddr()));
cluster.setDataNodeDead(dn.getDatanodeId());
LOG.info("stop datanode " + dn.getDatanodeId().getHostName());
}
}
DataNode dn = cluster.getDataNode(dnLocs[stopNodeIndex].getIpcPort());
cluster.stopDataNode(dnLocs[stopNodeIndex].getXferAddr());
cluster.setDataNodeDead(dn.getDatanodeId());
numDNs = numDNs - 1;
// Decommission node in a new thread. Verify that node is decommissioned.
final CountDownLatch decomStarted = new CountDownLatch(0);
Thread decomTh = new Thread() {
public void run() {
try {
decomStarted.countDown();
decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSIONED);
} catch (Exception e) {
LOG.error("Exception while decommissioning", e);
Assert.fail("Shouldn't throw exception!");
}
}
;
};
int deadDecomissioned = fsn.getNumDecomDeadDataNodes();
int liveDecomissioned = fsn.getNumDecomLiveDataNodes();
decomTh.start();
decomStarted.await(5, TimeUnit.SECONDS);
// grace period to trigger decommissioning call
Thread.sleep(3000);
// start datanode so that decommissioning live node will be finished
for (DataNodeProperties dnp : stoppedDns) {
cluster.restartDataNode(dnp);
LOG.info("Restarts stopped datanode:{} to trigger block reconstruction", dnp.datanode);
}
cluster.waitActive();
LOG.info("Waiting to finish decommissioning node:{}", decommisionNodes);
// waiting 20secs to finish decommission
decomTh.join(20000);
LOG.info("Finished decommissioning node:{}", decommisionNodes);
assertEquals(deadDecomissioned, fsn.getNumDecomDeadDataNodes());
assertEquals(liveDecomissioned + decommisionNodes.size(), fsn.getNumDecomLiveDataNodes());
// Ensure decommissioned datanode is not automatically shutdown
DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
assertEquals("All datanodes must be alive", numDNs, client.datanodeReport(DatanodeReportType.LIVE).length);
assertNull(checkFile(dfs, ecFile, 9, decommisionNodes, numDNs));
StripedFileTestUtil.checkData(dfs, ecFile, writeBytes, decommisionNodes, null, blockGroupSize);
cleanupFile(dfs, ecFile);
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hbase by apache.
the class TestAsyncLogRolling method testLogRollOnDatanodeDeath.
@Test(timeout = 180000)
public void testLogRollOnDatanodeDeath() throws IOException, InterruptedException {
dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null);
tableName = getName();
Table table = createTestTable(tableName);
TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
doPut(table, 1);
server = TEST_UTIL.getRSForFirstRegionInTable(table.getName());
HRegionInfo hri = server.getOnlineRegions(table.getName()).get(0).getRegionInfo();
AsyncFSWAL wal = (AsyncFSWAL) server.getWAL(hri);
int numRolledLogFiles = AsyncFSWALProvider.getNumRolledLogFiles(wal);
DatanodeInfo[] dnInfos = wal.getPipeline();
DataNodeProperties dnProp = TEST_UTIL.getDFSCluster().stopDataNode(dnInfos[0].getName());
TEST_UTIL.getDFSCluster().restartDataNode(dnProp);
doPut(table, 2);
assertEquals(numRolledLogFiles + 1, AsyncFSWALProvider.getNumRolledLogFiles(wal));
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestNameNodeMetadataConsistency method testGenerationStampInFuture.
/**
* This test creates a file and modifies the block generation stamp to number
* that name node has not seen yet. It then asserts that name node moves into
* safe mode while it is in startup mode.
*/
@Test
public void testGenerationStampInFuture() throws Exception {
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
OutputStream ostream = fs.create(filePath1);
ostream.write(TEST_DATA_IN_FUTURE.getBytes());
ostream.close();
// Re-write the Generation Stamp to a Generation Stamp in future.
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath1);
final long genStamp = block.getGenerationStamp();
final int datanodeIndex = 0;
cluster.changeGenStampOfBlock(datanodeIndex, block, genStamp + 1);
// stop the data node so that it won't remove block
final DataNodeProperties dnProps = cluster.stopDataNode(datanodeIndex);
// Simulate Namenode forgetting a Block
cluster.restartNameNode(true);
cluster.getNameNode().getNamesystem().writeLock();
BlockInfo bInfo = cluster.getNameNode().getNamesystem().getBlockManager().getStoredBlock(block.getLocalBlock());
bInfo.delete();
cluster.getNameNode().getNamesystem().getBlockManager().removeBlock(bInfo);
cluster.getNameNode().getNamesystem().writeUnlock();
// we also need to tell block manager that we are in the startup path
BlockManagerTestUtil.setStartupSafeModeForTest(cluster.getNameNode().getNamesystem().getBlockManager());
cluster.restartDataNode(dnProps);
waitForNumBytes(TEST_DATA_IN_FUTURE.length());
// Make sure that we find all written bytes in future block
assertEquals(TEST_DATA_IN_FUTURE.length(), cluster.getNameNode().getBytesWithFutureGenerationStamps());
// Assert safemode reason
assertTrue(cluster.getNameNode().getNamesystem().getSafeModeTip().contains("Name node detected blocks with generation stamps in future"));
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestSafeMode method testInitializeReplQueuesEarly.
/**
* Test that the NN initializes its under-replicated blocks queue
* before it is ready to exit safemode (HDFS-1476)
*/
@Test(timeout = 45000)
public void testInitializeReplQueuesEarly() throws Exception {
LOG.info("Starting testInitializeReplQueuesEarly");
// Spray the blocks around the cluster when we add DNs instead of
// concentrating all blocks on the first node.
BlockManagerTestUtil.setWritingPrefersLocalNode(cluster.getNamesystem().getBlockManager(), false);
cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
cluster.waitActive();
LOG.info("Creating files");
DFSTestUtil.createFile(fs, TEST_PATH, 15 * BLOCK_SIZE, (short) 1, 1L);
LOG.info("Stopping all DataNodes");
List<DataNodeProperties> dnprops = Lists.newLinkedList();
dnprops.add(cluster.stopDataNode(0));
dnprops.add(cluster.stopDataNode(0));
dnprops.add(cluster.stopDataNode(0));
cluster.getConfiguration(0).setFloat(DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f / 15f);
LOG.info("Restarting NameNode");
cluster.restartNameNode();
final NameNode nn = cluster.getNameNode();
String status = nn.getNamesystem().getSafemode();
assertEquals("Safe mode is ON. The reported blocks 0 needs additional " + "14 blocks to reach the threshold 0.9990 of total blocks 15." + NEWLINE + "The number of live datanodes 0 has reached the minimum number 0. " + "Safe mode will be turned off automatically once the thresholds " + "have been reached.", status);
assertFalse("Mis-replicated block queues should not be initialized " + "until threshold is crossed", NameNodeAdapter.safeModeInitializedReplQueues(nn));
LOG.info("Restarting one DataNode");
cluster.restartDataNode(dnprops.remove(0));
// Wait for block reports from all attached storages of
// the restarted DN to come in.
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return getLongCounter("StorageBlockReportOps", getMetrics(NN_METRICS)) == cluster.getStoragesPerDatanode();
}
}, 10, 10000);
final long safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
assertTrue("Expected first block report to make some blocks safe.", safe > 0);
assertTrue("Did not expect first block report to make all blocks safe.", safe < 15);
assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
// Ensure that UnderReplicatedBlocks goes up to 15 - safe. Misreplicated
// blocks are processed asynchronously so this may take a few seconds.
// Failure here will manifest as a test timeout.
BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
long underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
while (underReplicatedBlocks != (15 - safe)) {
LOG.info("UnderReplicatedBlocks expected=" + (15 - safe) + ", actual=" + underReplicatedBlocks);
Thread.sleep(100);
BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
}
cluster.restartDataNodes();
}
use of org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties in project hadoop by apache.
the class TestRBWBlockInvalidation method testRWRInvalidation.
/**
* Regression test for HDFS-4799, a case where, upon restart, if there
* were RWR replicas with out-of-date genstamps, the NN could accidentally
* delete good replicas instead of the bad replicas.
*/
@Test(timeout = 120000)
public void testRWRInvalidation() throws Exception {
Configuration conf = new HdfsConfiguration();
// Set the deletion policy to be randomized rather than the default.
// The default is based on disk space, which isn't controllable
// in the context of the test, whereas a random one is more accurate
// to what is seen in real clusters (nodes have random amounts of free
// space)
conf.setClass(DFSConfigKeys.DFS_BLOCK_REPLICATOR_CLASSNAME_KEY, RandomDeleterPolicy.class, BlockPlacementPolicy.class);
// Speed up the test a bit with faster heartbeats.
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
int numFiles = 10;
// Test with a bunch of separate files, since otherwise the test may
// fail just due to "good luck", even if a bug is present.
List<Path> testPaths = Lists.newArrayList();
for (int i = 0; i < numFiles; i++) {
testPaths.add(new Path("/test" + i));
}
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
try {
List<FSDataOutputStream> streams = Lists.newArrayList();
try {
// Open the test files and write some data to each
for (Path path : testPaths) {
FSDataOutputStream out = cluster.getFileSystem().create(path, (short) 2);
streams.add(out);
out.writeBytes("old gs data\n");
out.hflush();
}
for (Path path : testPaths) {
DFSTestUtil.waitReplication(cluster.getFileSystem(), path, (short) 2);
}
// Shutdown one of the nodes in the pipeline
DataNodeProperties oldGenstampNode = cluster.stopDataNode(0);
// be in the latter genstamp copy of the blocks.
for (int i = 0; i < streams.size(); i++) {
Path path = testPaths.get(i);
FSDataOutputStream out = streams.get(i);
out.writeBytes("new gs data\n");
out.hflush();
// Set replication so that only one node is necessary for this block,
// and close it.
cluster.getFileSystem().setReplication(path, (short) 1);
out.close();
}
for (Path path : testPaths) {
DFSTestUtil.waitReplication(cluster.getFileSystem(), path, (short) 1);
}
// Upon restart, there will be two replicas, one with an old genstamp
// and one current copy. This test wants to ensure that the old genstamp
// copy is the one that is deleted.
LOG.info("=========================== restarting cluster");
DataNodeProperties otherNode = cluster.stopDataNode(0);
cluster.restartNameNode();
// Restart the datanode with the corrupt replica first.
cluster.restartDataNode(oldGenstampNode);
cluster.waitActive();
// Then the other node
cluster.restartDataNode(otherNode);
cluster.waitActive();
// Compute and send invalidations, waiting until they're fully processed.
cluster.getNameNode().getNamesystem().getBlockManager().computeInvalidateWork(2);
cluster.triggerHeartbeats();
HATestUtil.waitForDNDeletions(cluster);
cluster.triggerDeletionReports();
waitForNumTotalBlocks(cluster, numFiles);
// Make sure we can still read the blocks.
for (Path path : testPaths) {
String ret = DFSTestUtil.readFile(cluster.getFileSystem(), path);
assertEquals("old gs data\n" + "new gs data\n", ret);
}
} finally {
IOUtils.cleanup(LOG, streams.toArray(new Closeable[0]));
}
} finally {
cluster.shutdown();
}
}
Aggregations