Search in sources :

Example 1 with StorageVolume

use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.

the class ScmHAFinalizeUpgradeActionDatanode method execute.

@Override
public void execute(DatanodeStateMachine dsm) throws Exception {
    LOG.info("Upgrading Datanode volume layout for SCM HA support.");
    MutableVolumeSet volumeSet = dsm.getContainer().getVolumeSet();
    for (StorageVolume volume : volumeSet.getVolumesList()) {
        volumeSet.writeLock();
        try {
            if (volume instanceof HddsVolume) {
                HddsVolume hddsVolume = (HddsVolume) volume;
                if (!upgradeVolume(hddsVolume, hddsVolume.getClusterID())) {
                    volumeSet.failVolume(volume.getStorageDir().getAbsolutePath());
                }
            }
        } finally {
            volumeSet.writeUnlock();
        }
    }
}
Also used : HddsVolume(org.apache.hadoop.ozone.container.common.volume.HddsVolume) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet)

Example 2 with StorageVolume

use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.

the class OzoneContainer method startContainerScrub.

/**
 * Start background daemon thread for performing container integrity checks.
 */
private void startContainerScrub() {
    ContainerScrubberConfiguration c = config.getObject(ContainerScrubberConfiguration.class);
    boolean enabled = c.isEnabled();
    if (!enabled) {
        LOG.info("Background container scanner has been disabled.");
    } else {
        if (this.metadataScanner == null) {
            this.metadataScanner = new ContainerMetadataScanner(c, controller);
        }
        this.metadataScanner.start();
        if (c.getBandwidthPerVolume() == 0L) {
            LOG.warn(VOLUME_BYTES_PER_SECOND_KEY + " is set to 0, " + "so background container data scanner will not start.");
            return;
        }
        dataScanners = new ArrayList<>();
        for (StorageVolume v : volumeSet.getVolumesList()) {
            ContainerDataScanner s = new ContainerDataScanner(c, controller, (HddsVolume) v);
            s.start();
            dataScanners.add(s);
        }
    }
}
Also used : StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume)

Example 3 with StorageVolume

use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.

the class VersionEndpointTask method call.

/**
 * Computes a result, or throws an exception if unable to do so.
 *
 * @return computed result
 * @throws Exception if unable to compute a result
 */
@Override
public EndpointStateMachine.EndPointStates call() throws Exception {
    rpcEndPoint.lock();
    try {
        if (rpcEndPoint.getState().equals(EndpointStateMachine.EndPointStates.GETVERSION)) {
            SCMVersionResponseProto versionResponse = rpcEndPoint.getEndPoint().getVersion(null);
            VersionResponse response = VersionResponse.getFromProtobuf(versionResponse);
            rpcEndPoint.setVersion(response);
            if (!rpcEndPoint.isPassive()) {
                // If end point is passive, datanode does not need to check volumes.
                String scmId = response.getValue(OzoneConsts.SCM_ID);
                String clusterId = response.getValue(OzoneConsts.CLUSTER_ID);
                // Check volumes
                MutableVolumeSet volumeSet = ozoneContainer.getVolumeSet();
                volumeSet.writeLock();
                try {
                    Map<String, StorageVolume> volumeMap = volumeSet.getVolumeMap();
                    Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be null");
                    Preconditions.checkNotNull(clusterId, "Reply from SCM: clusterId cannot be null");
                    // create version file and also set scm ID or cluster ID.
                    for (Map.Entry<String, StorageVolume> entry : volumeMap.entrySet()) {
                        StorageVolume volume = entry.getValue();
                        boolean result = HddsVolumeUtil.checkVolume((HddsVolume) volume, scmId, clusterId, configuration, LOG);
                        if (!result) {
                            volumeSet.failVolume(volume.getStorageDir().getPath());
                        }
                    }
                    if (volumeSet.getVolumesList().size() == 0) {
                        // All volumes are in inconsistent state
                        throw new DiskOutOfSpaceException("All configured Volumes are in Inconsistent State");
                    }
                } finally {
                    volumeSet.writeUnlock();
                }
                // Start the container services after getting the version information
                ozoneContainer.start(clusterId);
            }
            EndpointStateMachine.EndPointStates nextState = rpcEndPoint.getState().getNextState();
            rpcEndPoint.setState(nextState);
            rpcEndPoint.zeroMissedCount();
        } else {
            LOG.debug("Cannot execute GetVersion task as endpoint state machine " + "is in {} state", rpcEndPoint.getState());
        }
    } catch (DiskOutOfSpaceException ex) {
        rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN);
    } catch (IOException ex) {
        rpcEndPoint.logIfNeeded(ex);
    } finally {
        rpcEndPoint.unlock();
    }
    return rpcEndPoint.getState();
}
Also used : VersionResponse(org.apache.hadoop.ozone.protocol.VersionResponse) IOException(java.io.IOException) DiskOutOfSpaceException(org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException) EndpointStateMachine(org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) Map(java.util.Map) SCMVersionResponseProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto)

Example 4 with StorageVolume

use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.

the class TestDatanodeHddsVolumeFailureToleration method testDNCorrectlyHandlesVolumeFailureOnStartup.

@Test
public void testDNCorrectlyHandlesVolumeFailureOnStartup() throws Exception {
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    StorageVolume vol1 = volSet.getVolumesList().get(1);
    File volRootDir0 = vol0.getStorageDir();
    File volRootDir1 = vol1.getStorageDir();
    // simulate bad volumes <= tolerated
    DatanodeTestUtils.simulateBadRootDir(volRootDir0);
    // restart datanode to test
    cluster.restartHddsDatanode(0, true);
    // no exception is good
    // fail a second volume
    DatanodeTestUtils.simulateBadRootDir(volRootDir1);
    // restart datanode to test
    try {
        cluster.restartHddsDatanode(0, true);
        Assert.fail();
    } catch (RuntimeException e) {
        Assert.assertTrue(e.getMessage().contains("Can't start the HDDS datanode plugin"));
    }
    // restore bad volumes
    DatanodeTestUtils.restoreBadRootDir(volRootDir0);
    DatanodeTestUtils.restoreBadRootDir(volRootDir1);
}
Also used : StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Example 5 with StorageVolume

use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.

the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.

@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
    // write a file
    String keyName = UUID.randomUUID().toString();
    String value = "sample value";
    OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
    out.write(value.getBytes(UTF_8));
    out.close();
    OzoneKey key = bucket.getKey(keyName);
    Assert.assertEquals(keyName, key.getName());
    // corrupt chunk file by rename file->dir
    HddsDatanodeService dn = datanodes.get(0);
    OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
    MutableVolumeSet volSet = oc.getVolumeSet();
    StorageVolume vol0 = volSet.getVolumesList().get(0);
    Assert.assertTrue(vol0 instanceof HddsVolume);
    File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
    File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
    File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
    File containerDir = new File(containerTopDir, "1");
    File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
    File[] chunkFiles = chunksDir.listFiles();
    Assert.assertNotNull(chunkFiles);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.injectDataFileFailure(chunkFile);
    }
    // simulate bad volume by removing write permission on root dir
    // refer to HddsVolume.check()
    DatanodeTestUtils.simulateBadVolume(vol0);
    // read written file to trigger checkVolumeAsync
    OzoneInputStream is = bucket.readKey(keyName);
    byte[] fileContent = new byte[value.getBytes(UTF_8).length];
    try {
        is.read(fileContent);
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e instanceof IOException);
    } finally {
        is.close();
    }
    // should trigger checkVolumeAsync and
    // a failed volume should be detected
    DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
    DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
    // restore for cleanup
    DatanodeTestUtils.restoreBadVolume(vol0);
    for (File chunkFile : chunkFiles) {
        DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
    }
}
Also used : OzoneInputStream(org.apache.hadoop.ozone.client.io.OzoneInputStream) OzoneOutputStream(org.apache.hadoop.ozone.client.io.OzoneOutputStream) HddsDatanodeService(org.apache.hadoop.ozone.HddsDatanodeService) IOException(java.io.IOException) IOException(java.io.IOException) HddsVolume(org.apache.hadoop.ozone.container.common.volume.HddsVolume) StorageVolume(org.apache.hadoop.ozone.container.common.volume.StorageVolume) OzoneKey(org.apache.hadoop.ozone.client.OzoneKey) MutableVolumeSet(org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet) OzoneContainer(org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer) File(java.io.File) Test(org.junit.Test)

Aggregations

StorageVolume (org.apache.hadoop.ozone.container.common.volume.StorageVolume)10 MutableVolumeSet (org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet)8 File (java.io.File)5 HddsVolume (org.apache.hadoop.ozone.container.common.volume.HddsVolume)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 HddsDatanodeService (org.apache.hadoop.ozone.HddsDatanodeService)4 OzoneContainer (org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer)4 Map (java.util.Map)2 ContainerWithPipeline (org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline)2 OzoneKey (org.apache.hadoop.ozone.client.OzoneKey)2 OzoneInputStream (org.apache.hadoop.ozone.client.io.OzoneInputStream)2 OzoneOutputStream (org.apache.hadoop.ozone.client.io.OzoneOutputStream)2 Container (org.apache.hadoop.ozone.container.common.interfaces.Container)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 SCMVersionResponseProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMVersionResponseProto)1 CertificateSignRequest.getEncodedString (org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString)1 EndpointStateMachine (org.apache.hadoop.ozone.container.common.statemachine.EndpointStateMachine)1