Search in sources :

Example 21 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestDatanodeHddsVolumeFailureToleration method testDNCorrectlyHandlesVolumeFailureOnStartup.

@Test
public void testDNCorrectlyHandlesVolumeFailureOnStartup() throws Exception {
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    StorageVolume vol1 = volSet.getVolumesList().get(1);
    File volRootDir0 = vol0.getStorageDir();
    File volRootDir1 = vol1.getStorageDir();
    // simulate bad volumes <= tolerated
    DatanodeTestUtils.simulateBadRootDir(volRootDir0);
    // restart datanode to test
    cluster.restartHddsDatanode(0, true);
    // no exception is good
    // fail a second volume
    DatanodeTestUtils.simulateBadRootDir(volRootDir1);
    // restart datanode to test
    try {
        cluster.restartHddsDatanode(0, true);
        Assert.fail();
    } catch (RuntimeException e) {
        Assert.assertTrue(e.getMessage().contains("Can't start the HDDS datanode plugin"));
    }
    // restore bad volumes
    DatanodeTestUtils.restoreBadRootDir(volRootDir0);
    DatanodeTestUtils.restoreBadRootDir(volRootDir1);
}
Also used : StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 22 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.

@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
    // write a file
    String keyName = UUID.randomUUID().toString();
    String value = "sample value";
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OzoneKey key = bucket.getKey(keyName);
    Assert.assertEquals(keyName, key.getName());
    // corrupt chunk file by rename file->dir
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    Assert.assertTrue(vol0 instanceof HddsVolume);
    File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
    File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
    File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
    File containerDir = new File(containerTopDir, "1");
    File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
    File[] chunkFiles = chunksDir.listFiles();
    Assert.assertNotNull(chunkFiles);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.injectDataFileFailure(chunkFile);
    }
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    DatanodeTestUtils.simulateBadVolume(vol0);
    // read written file to trigger checkVolumeAsync
    OzoneInputStream is = bucket.readKey(keyName);
    byte[] fileContent = new byte[value.getBytes(UTF_8).length];
    try {
        is.read(fileContent);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    } finally {
        is.close();
    }
    // should trigger checkVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore for cleanup
    DatanodeTestUtils.restoreBadVolume(vol0);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
    }
}
Also used : OzoneInputStream(org.apache.hadoop.ozone.client.io.OzoneInputStream) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) IOException(java.io.IOException) HddsVolume(org.apache.hadoop.ozone.container.common.volume.HddsVolume) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneKey(org.apache.hadoop.ozone.client.OzoneKey) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 23 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnDbFileCorrupt.

@Test
public void testHddsVolumeFailureOnDbFileCorrupt() throws Exception {
    // write a file, will create container1
    String keyName = UUID.randomUUID().toString();
    String value = "sample value";
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OzoneKey key = bucket.getKey(keyName);
    Assert.assertEquals(keyName, key.getName());
    // close container1
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    Container c1 = oc.getContainerSet().getContainer(1);
    c1.close();
    // create container2, and container1 is kicked out of cache
    ContainerWithPipeline c2 = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    Assert.assertTrue(c2.getContainerInfo().getState().equals(HddsProtos.LifeCycleState.OPEN));
    // corrupt db by rename dir->file
    File metadataDir = new File(c1.getContainerFile().getParent());
    File dbDir = new File(metadataDir, "1" + OzoneConsts.DN_CONTAINER_DB);
    DatanodeTestUtils.injectDataDirFailure(dbDir);
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    DatanodeTestUtils.simulateBadVolume(vol0);
    // read written file to trigger checkVolumeAsync
    OzoneInputStream is = bucket.readKey(keyName);
    byte[] fileContent = new byte[value.getBytes(UTF_8).length];
    try {
        is.read(fileContent);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    } finally {
        is.close();
    }
    // should trigger CheckVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore all
    DatanodeTestUtils.restoreBadVolume(vol0);
    DatanodeTestUtils.restoreDataDirFromFailure(dbDir);
}
Also used : OzoneInputStream(org.apache.hadoop.ozone.client.io.OzoneInputStream) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) IOException(java.io.IOException) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) OzoneKey(org.apache.hadoop.ozone.client.OzoneKey) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 24 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnContainerFileCorrupt.

@Test
public void testHddsVolumeFailureOnContainerFileCorrupt() throws Exception {
    // create a container
    ContainerWithPipeline container = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    // corrupt container file by removing write permission on
    // container metadata dir, since container update operation
    // use a create temp & rename way, so we can't just rename
    // container file to simulate corruption
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    Container c1 = oc.getContainerSet().getContainer(container.getContainerInfo().getContainerID());
    File metadataDir = new File(c1.getContainerFile().getParent());
    DatanodeTestUtils.injectContainerMetaDirFailure(metadataDir);
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    DatanodeTestUtils.simulateBadVolume(vol0);
    // close container to trigger checkVolumeAsync
    try {
        c1.close();
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    }
    // should trigger CheckVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore for cleanup
    DatanodeTestUtils.restoreBadVolume(vol0);
    DatanodeTestUtils.restoreContainerMetaDirFromFailure(metadataDir);
}
Also used : StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) IOException(java.io.IOException) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 25 with HddsDatanodeService

use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.

the class TestDeleteContainerHandler method testDeleteContainerRequestHandlerOnClosedContainer.

@Test(timeout = 60000)
public void testDeleteContainerRequestHandlerOnClosedContainer() throws Exception {
    // the easiest way to create an open container is creating a key
    String keyName = UUID.randomUUID().toString();
    // create key
    createKey(keyName);
    // get containerID of the key
    ContainerID containerId = getContainerID(keyName);
    ContainerInfo container = cluster.getStorageContainerManager().getContainerManager().getContainer(containerId);
    Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(container.getPipelineID());
    // We need to close the container because delete container only happens
    // on closed containers with force flag set to false.
    HddsDatanodeService hddsDatanodeService = cluster.getHddsDatanodes().get(0);
    Assert.assertFalse(isContainerClosed(hddsDatanodeService, containerId.getId()));
    DatanodeDetails datanodeDetails = hddsDatanodeService.getDatanodeDetails();
    NodeManager nodeManager = cluster.getStorageContainerManager().getScmNodeManager();
    // send the order to close the container
    SCMCommand<?> command = new CloseContainerCommand(containerId.getId(), pipeline.getId());
    command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
    nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
    GenericTestUtils.waitFor(() -> isContainerClosed(hddsDatanodeService, containerId.getId()), 500, 5 * 1000);
    // double check if it's really closed (waitFor also throws an exception)
    Assert.assertTrue(isContainerClosed(hddsDatanodeService, containerId.getId()));
    // Check container exists before sending delete container command
    Assert.assertFalse(isContainerDeleted(hddsDatanodeService, containerId.getId()));
    // send delete container to the datanode
    command = new DeleteContainerCommand(containerId.getId(), false);
    command.setTerm(cluster.getStorageContainerManager().getScmContext().getTermOfLeader());
    nodeManager.addDatanodeCommand(datanodeDetails.getUuid(), command);
    GenericTestUtils.waitFor(() -> isContainerDeleted(hddsDatanodeService, containerId.getId()), 500, 5 * 1000);
    Assert.assertTrue(isContainerDeleted(hddsDatanodeService, containerId.getId()));
}
Also used : NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) CloseContainerCommand(org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ContainerInfo(org.apache.hadoop.hdds.scm.container.ContainerInfo) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) DeleteContainerCommand(org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) Test(org.junit.Test)

Aggregations

HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)41 Test (org.junit.Test)32 OzoneOutputStream (org.apache.hadoop.ozone.client.io.OzoneOutputStream)20 IOException (java.io.IOException)17 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)14 File (java.io.File)12 OmKeyLocationInfo (org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo)12 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)11 KeyOutputStream (org.apache.hadoop.ozone.client.io.KeyOutputStream)10 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)9 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)9 OzoneContainer (org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer)9 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)8 XceiverClientSpi (org.apache.hadoop.hdds.scm.XceiverClientSpi)8 GenericTestUtils (org.apache.ozone.test.GenericTestUtils)8 ArrayList (java.util.ArrayList)7 OzoneKey (org.apache.hadoop.ozone.client.OzoneKey)7 OzoneInputStream (org.apache.hadoop.ozone.client.io.OzoneInputStream)7 ContainerData (org.apache.hadoop.ozone.container.common.impl.ContainerData)7 Ignore (org.junit.Ignore)7