use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestDatanodeHddsVolumeFailureToleration method testDNCorrectlyHandlesVolumeFailureOnStartup.
@Test
public void testDNCorrectlyHandlesVolumeFailureOnStartup() throws Exception {
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
StorageVolume vol1 = volSet.getVolumesList().get(1);
File volRootDir0 = vol0.getStorageDir();
File volRootDir1 = vol1.getStorageDir();
// simulate bad volumes <= tolerated
DatanodeTestUtils.simulateBadRootDir(volRootDir0);
// restart datanode to test
cluster.restartHddsDatanode(0, true);
// no exception is good
// fail a second volume
DatanodeTestUtils.simulateBadRootDir(volRootDir1);
// restart datanode to test
try {
cluster.restartHddsDatanode(0, true);
Assert.fail();
} catch (RuntimeException e) {
Assert.assertTrue(e.getMessage().contains("Can't start the HDDS datanode plugin"));
}
// restore bad volumes
DatanodeTestUtils.restoreBadRootDir(volRootDir0);
DatanodeTestUtils.restoreBadRootDir(volRootDir1);
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.
@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
// write a file
String keyName = UUID.randomUUID().toString();
String value = "sample value";
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OzoneKey key = bucket.getKey(keyName);
Assert.assertEquals(keyName, key.getName());
// corrupt chunk file by rename file->dir
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
Assert.assertTrue(vol0 instanceof HddsVolume);
File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
File containerDir = new File(containerTopDir, "1");
File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
File[] chunkFiles = chunksDir.listFiles();
Assert.assertNotNull(chunkFiles);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.injectDataFileFailure(chunkFile);
}
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
DatanodeTestUtils.simulateBadVolume(vol0);
// read written file to trigger checkVolumeAsync
OzoneInputStream is = bucket.readKey(keyName);
byte[] fileContent = new byte[value.getBytes(UTF_8).length];
try {
is.read(fileContent);
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
} finally {
is.close();
}
// should trigger checkVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore for cleanup
DatanodeTestUtils.restoreBadVolume(vol0);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
}
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnDbFileCorrupt.
@Test
public void testHddsVolumeFailureOnDbFileCorrupt() throws Exception {
// write a file, will create container1
String keyName = UUID.randomUUID().toString();
String value = "sample value";
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OzoneKey key = bucket.getKey(keyName);
Assert.assertEquals(keyName, key.getName());
// close container1
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
Container c1 = oc.getContainerSet().getContainer(1);
c1.close();
// create container2, and container1 is kicked out of cache
ContainerWithPipeline c2 = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
Assert.assertTrue(c2.getContainerInfo().getState().equals(HddsProtos.LifeCycleState.OPEN));
// corrupt db by rename dir->file
File metadataDir = new File(c1.getContainerFile().getParent());
File dbDir = new File(metadataDir, "1" + OzoneConsts.DN_CONTAINER_DB);
DatanodeTestUtils.injectDataDirFailure(dbDir);
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
DatanodeTestUtils.simulateBadVolume(vol0);
// read written file to trigger checkVolumeAsync
OzoneInputStream is = bucket.readKey(keyName);
byte[] fileContent = new byte[value.getBytes(UTF_8).length];
try {
is.read(fileContent);
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
} finally {
is.close();
}
// should trigger CheckVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore all
DatanodeTestUtils.restoreBadVolume(vol0);
DatanodeTestUtils.restoreDataDirFromFailure(dbDir);
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnContainerFileCorrupt.
@Test
public void testHddsVolumeFailureOnContainerFileCorrupt() throws Exception {
// create a container
ContainerWithPipeline container = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
// corrupt container file by removing write permission on
// container metadata dir, since container update operation
// use a create temp & rename way, so we can't just rename
// container file to simulate corruption
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
Container c1 = oc.getContainerSet().getContainer(container.getContainerInfo().getContainerID());
File metadataDir = new File(c1.getContainerFile().getParent());
DatanodeTestUtils.injectContainerMetaDirFailure(metadataDir);
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
DatanodeTestUtils.simulateBadVolume(vol0);
// close container to trigger checkVolumeAsync
try {
c1.close();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
}
// should trigger CheckVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore for cleanup
DatanodeTestUtils.restoreBadVolume(vol0);
DatanodeTestUtils.restoreContainerMetaDirFromFailure(metadataDir);
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestDeleteContainerHandler method testDeleteContainerRequestHandlerOnClosedContainer.
@Test(timeout = 60000)
public void testDeleteContainerRequestHandlerOnClosedContainer() throws Exception {
// the easiest way to create an open container is creating a key
String keyName = UUID.randomUUID().toString();
// create key
createKey(keyName);
// get containerID of the key
ContainerID containerId = getContainerID(keyName);
ContainerInfo container = cluster.getStorageContainerManager().getContainerManager().getContainer(containerId);
Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(container.getPipelineID());
// We need to close the container because delete container only happens
// on closed containers with force flag set to false.
HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0);
Assert.assertFalse(isContainerClosed(hddsDatanodeService, containerId.getId()));
DatanodeDetails datanodeDetails = hddsDatanodeService.getDatanodeDetails();
NodeManager nodeManager = cluster.getStorageContainerManager().getScmNodeManager();
// send the order to close the container
SCMCommand<?> command = new CloseContainerCommand(containerId.getId(), pipeline.getId());
command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
GenericTestUtils.waitFor(() -> isContainerClosed(hddsDatanodeService, containerId.getId()), 500, 5 * 1000);
// double check if it's really closed (waitFor also throws an exception)
Assert.assertTrue(isContainerClosed(hddsDatanodeService, containerId.getId()));
// Check container exists before sending delete container command
Assert.assertFalse(isContainerDeleted(hddsDatanodeService, containerId.getId()));
// send delete container to the datanode
command = new DeleteContainerCommand(containerId.getId(), false);
command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
GenericTestUtils.waitFor(() -> isContainerDeleted(hddsDatanodeService, containerId.getId()), 500, 5 * 1000);
Assert.assertTrue(isContainerDeleted(hddsDatanodeService, containerId.getId()));
}
Aggregations