use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.
the class TestNameNodeReconfigure method testReconfigureHearbeatCheck.
/**
* Test to reconfigure interval of heart beat check and re-check.
*/
@Test
public void testReconfigureHearbeatCheck() throws ReconfigurationException {
final NameNode nameNode = cluster.getNameNode();
final DatanodeManager datanodeManager = nameNode.namesystem.getBlockManager().getDatanodeManager();
// change properties
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "" + 6);
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "" + (10 * 60 * 1000));
// try invalid values
try {
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, "text");
fail("ReconfigurationException expected");
} catch (ReconfigurationException expected) {
assertTrue(expected.getCause() instanceof NumberFormatException);
}
try {
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, "text");
fail("ReconfigurationException expected");
} catch (ReconfigurationException expected) {
assertTrue(expected.getCause() instanceof NumberFormatException);
}
// verify change
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, nameNode.getConf().getLong(DFS_HEARTBEAT_INTERVAL_KEY, DFS_HEARTBEAT_INTERVAL_DEFAULT));
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", 6, datanodeManager.getHeartbeatInterval());
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, nameNode.getConf().getInt(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT));
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", 10 * 60 * 1000, datanodeManager.getHeartbeatRecheckInterval());
// revert to defaults
nameNode.reconfigureProperty(DFS_HEARTBEAT_INTERVAL_KEY, null);
nameNode.reconfigureProperty(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, null);
// verify defaults
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_HEARTBEAT_INTERVAL_KEY));
assertEquals(DFS_HEARTBEAT_INTERVAL_KEY + " has wrong value", DFS_HEARTBEAT_INTERVAL_DEFAULT, datanodeManager.getHeartbeatInterval());
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", null, nameNode.getConf().get(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY));
assertEquals(DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY + " has wrong value", DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT, datanodeManager.getHeartbeatRecheckInterval());
}
use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testAddVolumeFailures.
@Test
public void testAddVolumeFailures() throws IOException {
startDFSCluster(1, 1);
final String dataDir = cluster.getDataDirectory();
DataNode dn = cluster.getDataNodes().get(0);
List<String> newDirs = Lists.newArrayList();
final int NUM_NEW_DIRS = 4;
for (int i = 0; i < NUM_NEW_DIRS; i++) {
File newVolume = new File(dataDir, "new_vol" + i);
newDirs.add(newVolume.toString());
if (i % 2 == 0) {
// Make addVolume() fail.
newVolume.createNewFile();
}
}
String newValue = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY) + "," + Joiner.on(",").join(newDirs);
try {
dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newValue);
fail("Expect to throw IOException.");
} catch (ReconfigurationException e) {
String errorMessage = e.getCause().getMessage();
String[] messages = errorMessage.split("\\r?\\n");
assertEquals(2, messages.length);
assertThat(messages[0], containsString("new_vol0"));
assertThat(messages[1], containsString("new_vol2"));
}
// Make sure that vol0 and vol2's metadata are not left in memory.
FsDatasetSpi<?> dataset = dn.getFSDataset();
try (FsDatasetSpi.FsVolumeReferences volumes = dataset.getFsVolumeReferences()) {
for (FsVolumeSpi volume : volumes) {
assertThat(new File(volume.getStorageLocation().getUri()).toString(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
}
}
DataStorage storage = dn.getStorage();
for (int i = 0; i < storage.getNumStorageDirs(); i++) {
Storage.StorageDirectory sd = storage.getStorageDir(i);
assertThat(sd.getRoot().toString(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
}
// The newly effective conf does not have vol0 and vol2.
String[] effectiveVolumes = dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY).split(",");
assertEquals(4, effectiveVolumes.length);
for (String ev : effectiveVolumes) {
assertThat(new File(StorageLocation.parse(ev).getUri()).getCanonicalPath(), is(not(anyOf(is(newDirs.get(0)), is(newDirs.get(2))))));
}
}
use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testRemoveVolumeBeingWrittenForDatanode.
/**
* Test the case that remove a data volume on a particular DataNode when the
* volume is actively being written.
* @param dataNodeIdx the index of the DataNode to remove a volume.
*/
private void testRemoveVolumeBeingWrittenForDatanode(int dataNodeIdx) throws IOException, ReconfigurationException, TimeoutException, InterruptedException, BrokenBarrierException {
// Starts DFS cluster with 3 DataNodes to form a pipeline.
startDFSCluster(1, 3);
final short REPLICATION = 3;
final DataNode dn = cluster.getDataNodes().get(dataNodeIdx);
final FileSystem fs = cluster.getFileSystem();
final Path testFile = new Path("/test");
FSDataOutputStream out = fs.create(testFile, REPLICATION);
Random rb = new Random(0);
// half of the block.
byte[] writeBuf = new byte[BLOCK_SIZE / 2];
rb.nextBytes(writeBuf);
out.write(writeBuf);
out.hflush();
// Make FsDatasetSpi#finalizeBlock a time-consuming operation. So if the
// BlockReceiver releases volume reference before finalizeBlock(), the blocks
// on the volume will be removed, and finalizeBlock() throws IOE.
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
dn.data = Mockito.spy(data);
doAnswer(new Answer<Object>() {
public Object answer(InvocationOnMock invocation) throws IOException, InterruptedException {
Thread.sleep(1000);
// Bypass the argument to FsDatasetImpl#finalizeBlock to verify that
// the block is not removed, since the volume reference should not
// be released at this point.
data.finalizeBlock((ExtendedBlock) invocation.getArguments()[0]);
return null;
}
}).when(dn.data).finalizeBlock(any(ExtendedBlock.class));
final CyclicBarrier barrier = new CyclicBarrier(2);
List<String> oldDirs = getDataDirs(dn);
// Remove the first volume.
final String newDirs = oldDirs.get(1);
final List<Exception> exceptions = new ArrayList<>();
Thread reconfigThread = new Thread() {
public void run() {
try {
barrier.await();
assertThat("DN did not update its own config", dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDirs), is(dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY)));
} catch (ReconfigurationException | InterruptedException | BrokenBarrierException e) {
exceptions.add(e);
}
}
};
reconfigThread.start();
barrier.await();
rb.nextBytes(writeBuf);
out.write(writeBuf);
out.hflush();
out.close();
reconfigThread.join();
// Verify if the data directory reconfigure was successful
FsDatasetSpi<? extends FsVolumeSpi> fsDatasetSpi = dn.getFSDataset();
try (FsDatasetSpi.FsVolumeReferences fsVolumeReferences = fsDatasetSpi.getFsVolumeReferences()) {
for (int i = 0; i < fsVolumeReferences.size(); i++) {
System.out.println("Vol: " + fsVolumeReferences.get(i).getBaseURI().toString());
}
assertEquals("Volume remove wasn't successful.", 1, fsVolumeReferences.size());
}
// Verify the file has sufficient replications.
DFSTestUtil.waitReplication(fs, testFile, REPLICATION);
// Read the content back
byte[] content = DFSTestUtil.readFileBuffer(fs, testFile);
assertEquals(BLOCK_SIZE, content.length);
if (!exceptions.isEmpty()) {
throw new IOException(exceptions.get(0).getCause());
}
}
use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testAddVolumesConcurrently.
@Test(timeout = 180000)
public void testAddVolumesConcurrently() throws IOException, InterruptedException, TimeoutException, ReconfigurationException {
startDFSCluster(1, 1, 10);
int numVolumes = cluster.getStoragesPerDatanode();
String blockPoolId = cluster.getNamesystem().getBlockPoolId();
Path testFile = new Path("/test");
// Each volume has 2 blocks
int initialBlockCount = numVolumes * 2;
createFile(testFile, initialBlockCount);
DataNode dn = cluster.getDataNodes().get(0);
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
dn.data = Mockito.spy(data);
final int newVolumeCount = 40;
List<Thread> addVolumeDelayedThreads = new ArrayList<>();
AtomicBoolean addVolumeError = new AtomicBoolean(false);
AtomicBoolean listStorageError = new AtomicBoolean(false);
CountDownLatch addVolumeCompletionLatch = new CountDownLatch(newVolumeCount);
// Thread to list all storage available at DataNode,
// when the volumes are being added in parallel.
final Thread listStorageThread = new Thread(new Runnable() {
@Override
public void run() {
while (addVolumeCompletionLatch.getCount() != newVolumeCount) {
int i = 0;
while (i++ < 1000) {
try {
dn.getStorage().listStorageDirectories();
} catch (Exception e) {
listStorageError.set(true);
LOG.error("Error listing storage: " + e);
}
}
}
}
});
listStorageThread.start();
// FsDatasetImpl addVolume mocked to perform the operation asynchronously
doAnswer(new Answer<Object>() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
final Random r = new Random();
Thread addVolThread = new Thread(new Runnable() {
@Override
public void run() {
try {
r.setSeed(Time.now());
// start after an initial delay.
if (r.nextInt(10) > 4) {
int s = r.nextInt(10) + 1;
Thread.sleep(s * 100);
}
invocationOnMock.callRealMethod();
} catch (Throwable throwable) {
addVolumeError.set(true);
LOG.error("Error adding volume: " + throwable);
} finally {
addVolumeCompletionLatch.countDown();
}
}
});
addVolumeDelayedThreads.add(addVolThread);
addVolThread.start();
return null;
}
}).when(dn.data).addVolume(any(StorageLocation.class), any(List.class));
addVolumes(newVolumeCount, addVolumeCompletionLatch);
numVolumes += newVolumeCount;
// Wait for all addVolume and listStorage Threads to complete
for (Thread t : addVolumeDelayedThreads) {
t.join();
}
listStorageThread.join();
// Verify errors while adding volumes and listing storage directories
Assert.assertEquals("Error adding volumes!", false, addVolumeError.get());
Assert.assertEquals("Error listing storage!", false, listStorageError.get());
int additionalBlockCount = 9;
int totalBlockCount = initialBlockCount + additionalBlockCount;
// Continue to write the same file, thus the new volumes will have blocks.
DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * additionalBlockCount);
verifyFileLength(cluster.getFileSystem(), testFile, totalBlockCount);
List<Map<DatanodeStorage, BlockListAsLongs>> blockReports = cluster.getAllBlockReports(blockPoolId);
assertEquals(1, blockReports.size());
assertEquals(numVolumes, blockReports.get(0).size());
}
use of org.apache.hadoop.conf.ReconfigurationException in project hadoop by apache.
the class TestDataNodeVolumeFailureReporting method testHotSwapOutFailedVolumeAndReporting.
/**
* Verify DataNode NumFailedVolumes and FailedStorageLocations
* after hot swap out of failed volume.
*/
@Test
public void testHotSwapOutFailedVolumeAndReporting() throws Exception {
final File dn0Vol1 = new File(dataDir, "data" + (2 * 0 + 1));
final File dn0Vol2 = new File(dataDir, "data" + (2 * 0 + 2));
final DataNode dn0 = cluster.getDataNodes().get(0);
final String oldDataDirs = dn0.getConf().get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName mxbeanName = new ObjectName("Hadoop:service=DataNode,name=FSDatasetState-" + dn0.getDatanodeUuid());
int numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 0, false, new String[] {});
// Fail dn0Vol1 first.
// Verify NumFailedVolumes and FailedStorageLocations are empty.
DataNodeTestUtils.injectDataDirFailure(dn0Vol1);
DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol1));
numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(1, numFailedVolumes);
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol1.getAbsolutePath() });
// Verify NumFailedVolumes and FailedStorageLocations haven't changed.
try {
dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs);
fail("Reconfigure with failed disk should throw exception.");
} catch (ReconfigurationException e) {
Assert.assertTrue("Reconfigure exception doesn't have expected path!", e.getCause().getMessage().contains(dn0Vol1.getAbsolutePath()));
}
numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(1, numFailedVolumes);
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol1.getAbsolutePath() });
// Hot swap out the failed volume.
// Verify NumFailedVolumes and FailedStorageLocations are reset.
String dataDirs = dn0Vol2.getPath();
dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDirs);
numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(0, numFailedVolumes);
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 0, true, new String[] {});
// Fix failure volume dn0Vol1 and remount it back.
// Verify NumFailedVolumes and FailedStorageLocations are empty.
DataNodeTestUtils.restoreDataDirFromFailure(dn0Vol1);
dn0.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, oldDataDirs);
numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(0, numFailedVolumes);
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 0, true, new String[] {});
// Fail dn0Vol2.
// Verify NumFailedVolumes and FailedStorageLocations are updated.
DataNodeTestUtils.injectDataDirFailure(dn0Vol2);
DataNodeTestUtils.waitForDiskError(dn0, DataNodeTestUtils.getVolume(dn0, dn0Vol2));
numFailedVolumes = (int) mbs.getAttribute(mxbeanName, "NumFailedVolumes");
Assert.assertEquals(1, numFailedVolumes);
Assert.assertEquals(dn0.getFSDataset().getNumFailedVolumes(), numFailedVolumes);
checkFailuresAtDataNode(dn0, 1, true, new String[] { dn0Vol2.getAbsolutePath() });
// Verify DataNode tolerating one disk failure.
assertTrue(dn0.shouldRun());
}
Aggregations