Search in sources :

Example 16 with MutableVolumeSet

use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.

@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
    // write a file
    String keyName = UUID.randomUUID().toString();
    String value = "sample value";
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OzoneKey key = bucket.getKey(keyName);
    Assert.assertEquals(keyName, key.getName());
    // corrupt chunk file by rename file->dir
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    Assert.assertTrue(vol0 instanceof HddsVolume);
    File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
    File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
    File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
    File containerDir = new File(containerTopDir, "1");
    File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
    File[] chunkFiles = chunksDir.listFiles();
    Assert.assertNotNull(chunkFiles);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.injectDataFileFailure(chunkFile);
    }
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    DatanodeTestUtils.simulateBadVolume(vol0);
    // read written file to trigger checkVolumeAsync
    OzoneInputStream is = bucket.readKey(keyName);
    byte[] fileContent = new byte[value.getBytes(UTF_8).length];
    try {
        is.read(fileContent);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    } finally {
        is.close();
    }
    // should trigger checkVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore for cleanup
    DatanodeTestUtils.restoreBadVolume(vol0);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
    }
}
Also used : OzoneInputStream(org.apache.hadoop.ozone.client.io.OzoneInputStream) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) IOException(java.io.IOException) HddsVolume(org.apache.hadoop.ozone.container.common.volume.HddsVolume) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneKey(org.apache.hadoop.ozone.client.OzoneKey) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 17 with MutableVolumeSet

use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnDbFileCorrupt.

@Test
public void testHddsVolumeFailureOnDbFileCorrupt() throws Exception {
    // write a file, will create container1
    String keyName = UUID.randomUUID().toString();
    String value = "sample value";
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OzoneKey key = bucket.getKey(keyName);
    Assert.assertEquals(keyName, key.getName());
    // close container1
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    Container c1 = oc.getContainerSet().getContainer(1);
    c1.close();
    // create container2, and container1 is kicked out of cache
    ContainerWithPipeline c2 = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    Assert.assertTrue(c2.getContainerInfo().getState().equals(HddsProtos.LifeCycleState.OPEN));
    // corrupt db by rename dir->file
    File metadataDir = new File(c1.getContainerFile().getParent());
    File dbDir = new File(metadataDir, "1" + OzoneConsts.DN_CONTAINER_DB);
    DatanodeTestUtils.injectDataDirFailure(dbDir);
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    DatanodeTestUtils.simulateBadVolume(vol0);
    // read written file to trigger checkVolumeAsync
    OzoneInputStream is = bucket.readKey(keyName);
    byte[] fileContent = new byte[value.getBytes(UTF_8).length];
    try {
        is.read(fileContent);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    } finally {
        is.close();
    }
    // should trigger CheckVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore all
    DatanodeTestUtils.restoreBadVolume(vol0);
    DatanodeTestUtils.restoreDataDirFromFailure(dbDir);
}
Also used : OzoneInputStream(org.apache.hadoop.ozone.client.io.OzoneInputStream) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) IOException(java.io.IOException) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) OzoneKey(org.apache.hadoop.ozone.client.OzoneKey) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 18 with MutableVolumeSet

use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnContainerFileCorrupt.

@Test
public void testHddsVolumeFailureOnContainerFileCorrupt() throws Exception {
    // create a container
    ContainerWithPipeline container = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
    // corrupt container file by removing write permission on
    // container metadata dir, since container update operation
    // use a create temp & rename way, so we can't just rename
    // container file to simulate corruption
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    Container c1 = oc.getContainerSet().getContainer(container.getContainerInfo().getContainerID());
    File metadataDir = new File(c1.getContainerFile().getParent());
    DatanodeTestUtils.injectContainerMetaDirFailure(metadataDir);
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    DatanodeTestUtils.simulateBadVolume(vol0);
    // close container to trigger checkVolumeAsync
    try {
        c1.close();
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    }
    // should trigger CheckVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore for cleanup
    DatanodeTestUtils.restoreBadVolume(vol0);
    DatanodeTestUtils.restoreContainerMetaDirFromFailure(metadataDir);
}
Also used : StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) Container(org.apache.hadoop.ozone.container.common.interfaces.Container) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) IOException(java.io.IOException) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) ContainerWithPipeline(org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 19 with MutableVolumeSet

use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.

the class ClosedContainerReplicator method initializeReplicationSupervisor.

@NotNull
private void initializeReplicationSupervisor(ConfigurationSource conf) throws IOException {
    String fakeDatanodeUuid = datanode;
    if (fakeDatanodeUuid.isEmpty()) {
        fakeDatanodeUuid = UUID.randomUUID().toString();
    }
    ContainerSet containerSet = new ContainerSet();
    ContainerMetrics metrics = ContainerMetrics.create(conf);
    MutableVolumeSet volumeSet = new MutableVolumeSet(fakeDatanodeUuid, conf, null, StorageVolume.VolumeType.DATA_VOLUME, null);
    Map<ContainerType, Handler> handlers = new HashMap<>();
    for (ContainerType containerType : ContainerType.values()) {
        final Handler handler = Handler.getHandlerForContainerType(containerType, conf, fakeDatanodeUuid, containerSet, volumeSet, metrics, containerReplicaProto -> {
        });
        handler.setClusterID(UUID.randomUUID().toString());
        handlers.put(containerType, handler);
    }
    ContainerController controller = new ContainerController(containerSet, handlers);
    ContainerReplicator replicator = new DownloadAndImportReplicator(containerSet, controller, new SimpleContainerDownloader(conf, null), new TarContainerPacker());
    supervisor = new ReplicationSupervisor(containerSet, replicator, 10);
}
Also used : ReplicationSupervisor(org.apache.hadoop.ozone.container.replication.ReplicationSupervisor) HashMap(java.util.HashMap) DownloadAndImportReplicator(org.apache.hadoop.ozone.container.replication.DownloadAndImportReplicator) ContainerType(org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType) Handler(org.apache.hadoop.ozone.container.common.interfaces.Handler) ContainerController(org.apache.hadoop.ozone.container.ozoneimpl.ContainerController) SimpleContainerDownloader(org.apache.hadoop.ozone.container.replication.SimpleContainerDownloader) TarContainerPacker(org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker) ContainerSet(org.apache.hadoop.ozone.container.common.impl.ContainerSet) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) ContainerMetrics(org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics) ContainerReplicator(org.apache.hadoop.ozone.container.replication.ContainerReplicator) NotNull(org.jetbrains.annotations.NotNull)

Example 20 with MutableVolumeSet

use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.

the class HddsDatanodeService method startRatisForTest.

/**
 * Initialize and start Ratis server.
 * <p>
 * In normal case this initialization is done after the SCM registration.
 * In can be forced to make it possible to test one, single, isolated
 * datanode.
 */
private void startRatisForTest() throws IOException {
    String clusterId = "clusterId";
    datanodeStateMachine.getContainer().start(clusterId);
    MutableVolumeSet volumeSet = getDatanodeStateMachine().getContainer().getVolumeSet();
    Map<String, StorageVolume> volumeMap = volumeSet.getVolumeMap();
    for (Map.Entry<String, StorageVolume> entry : volumeMap.entrySet()) {
        HddsVolume hddsVolume = (HddsVolume) entry.getValue();
        boolean result = HddsVolumeUtil.checkVolume(hddsVolume, clusterId, clusterId, conf, LOG);
        if (!result) {
            volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath());
        }
    }
}
Also used : HddsVolume(org.apache.hadoop.ozone.container.common.volume.HddsVolume) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) CertificateSignRequest.getEncodedString(org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap)

Aggregations

MutableVolumeSet (org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet)25 File (java.io.File)12 Test (org.junit.Test)12 OzoneConfiguration (org.apache.hadoop.hdds.conf.OzoneConfiguration)10 StorageVolume (org.apache.hadoop.ozone.container.common.volume.StorageVolume)10 ContainerMetrics (org.apache.hadoop.ozone.container.common.helpers.ContainerMetrics)9 ContainerSet (org.apache.hadoop.ozone.container.common.impl.ContainerSet)8 IOException (java.io.IOException)7 HddsVolume (org.apache.hadoop.ozone.container.common.volume.HddsVolume)7 RoundRobinVolumeChoosingPolicy (org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy)7 Handler (org.apache.hadoop.ozone.container.common.interfaces.Handler)6 VolumeSet (org.apache.hadoop.ozone.container.common.volume.VolumeSet)6 DatanodeStateMachine (org.apache.hadoop.ozone.container.common.statemachine.DatanodeStateMachine)5 StateContext (org.apache.hadoop.ozone.container.common.statemachine.StateContext)5 Map (java.util.Map)4 UUID (java.util.UUID)4 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)4 HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)4 KeyValueContainer (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer)4 KeyValueContainerData (org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData)4