use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.
the class ScmHAFinalizeUpgradeActionDatanode method execute.
@Override
public void execute(DatanodeStateMachine dsm) throws Exception {
LOG.info("Upgrading Datanode volume layout for SCM HA support.");
MutableVolumeSet volumeSet = dsm.getContainer().getVolumeSet();
for (StorageVolume volume : volumeSet.getVolumesList()) {
volumeSet.writeLock();
try {
if (volume instanceof HddsVolume) {
HddsVolume hddsVolume = (HddsVolume) volume;
if (!upgradeVolume(hddsVolume, hddsVolume.getClusterID())) {
volumeSet.failVolume(volume.getStorageDir().getAbsolutePath());
}
}
} finally {
volumeSet.writeUnlock();
}
}
}
use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.
the class OzoneContainer method startContainerScrub.
/**
* Start background daemon thread for performing container integrity checks.
*/
private void startContainerScrub() {
ContainerScrubberConfiguration c = config.getObject(ContainerScrubberConfiguration.class);
boolean enabled = c.isEnabled();
if (!enabled) {
LOG.info("Background container scanner has been disabled.");
} else {
if (this.metadataScanner == null) {
this.metadataScanner = new ContainerMetadataScanner(c, controller);
}
this.metadataScanner.start();
if (c.getBandwidthPerVolume() == 0L) {
LOG.warn(VOLUME_BYTES_PER_SECOND_KEY + " is set to 0, " + "so background container data scanner will not start.");
return;
}
dataScanners = new ArrayList<>();
for (StorageVolume v : volumeSet.getVolumesList()) {
ContainerDataScanner s = new ContainerDataScanner(c, controller, (HddsVolume) v);
s.start();
dataScanners.add(s);
}
}
}
use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.
the class VersionEndpointTask method call.
/**
* Computes a result, or throws an exception if unable to do so.
*
* @return computed result
* @throws Exception if unable to compute a result
*/
@Override
public EndpointStateMachine.EndPointStates call() throws Exception {
rpcEndPoint.lock();
try {
if (rpcEndPoint.getState().equals(EndpointStateMachine.EndPointStates.GETVERSION)) {
SCMVersionResponseProto versionResponse = rpcEndPoint.getEndPoint().getVersion(null);
VersionResponse response = VersionResponse.getFromProtobuf(versionResponse);
rpcEndPoint.setVersion(response);
if (!rpcEndPoint.isPassive()) {
// If end point is passive, datanode does not need to check volumes.
String scmId = response.getValue(OzoneConsts.SCM_ID);
String clusterId = response.getValue(OzoneConsts.CLUSTER_ID);
// Check volumes
MutableVolumeSet volumeSet = ozoneContainer.getVolumeSet();
volumeSet.writeLock();
try {
Map<String, StorageVolume> volumeMap = volumeSet.getVolumeMap();
Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be null");
Preconditions.checkNotNull(clusterId, "Reply from SCM: clusterId cannot be null");
// create version file and also set scm ID or cluster ID.
for (Map.Entry<String, StorageVolume> entry : volumeMap.entrySet()) {
StorageVolume volume = entry.getValue();
boolean result = HddsVolumeUtil.checkVolume((HddsVolume) volume, scmId, clusterId, configuration, LOG);
if (!result) {
volumeSet.failVolume(volume.getStorageDir().getPath());
}
}
if (volumeSet.getVolumesList().size() == 0) {
// All volumes are in inconsistent state
throw new DiskOutOfSpaceException("All configured Volumes are in Inconsistent State");
}
} finally {
volumeSet.writeUnlock();
}
// Start the container services after getting the version information
ozoneContainer.start(clusterId);
}
EndpointStateMachine.EndPointStates nextState = rpcEndPoint.getState().getNextState();
rpcEndPoint.setState(nextState);
rpcEndPoint.zeroMissedCount();
} else {
LOG.debug("Cannot execute GetVersion task as endpoint state machine " + "is in {} state", rpcEndPoint.getState());
}
} catch (DiskOutOfSpaceException ex) {
rpcEndPoint.setState(EndpointStateMachine.EndPointStates.SHUTDOWN);
} catch (IOException ex) {
rpcEndPoint.logIfNeeded(ex);
} finally {
rpcEndPoint.unlock();
}
return rpcEndPoint.getState();
}
use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.
the class TestDatanodeHddsVolumeFailureToleration method testDNCorrectlyHandlesVolumeFailureOnStartup.
@Test
public void testDNCorrectlyHandlesVolumeFailureOnStartup() throws Exception {
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
StorageVolume vol1 = volSet.getVolumesList().get(1);
File volRootDir0 = vol0.getStorageDir();
File volRootDir1 = vol1.getStorageDir();
// simulate bad volumes <= tolerated
DatanodeTestUtils.simulateBadRootDir(volRootDir0);
// restart datanode to test
cluster.restartHddsDatanode(0, true);
// no exception is good
// fail a second volume
DatanodeTestUtils.simulateBadRootDir(volRootDir1);
// restart datanode to test
try {
cluster.restartHddsDatanode(0, true);
Assert.fail();
} catch (RuntimeException e) {
Assert.assertTrue(e.getMessage().contains("Can't start the HDDS datanode plugin"));
}
// restore bad volumes
DatanodeTestUtils.restoreBadRootDir(volRootDir0);
DatanodeTestUtils.restoreBadRootDir(volRootDir1);
}
use of org.apache.hadoop.ozone.container.common.volume.StorageVolume in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.
@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
// write a file
String keyName = UUID.randomUUID().toString();
String value = "sample value";
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OzoneKey key = bucket.getKey(keyName);
Assert.assertEquals(keyName, key.getName());
// corrupt chunk file by rename file->dir
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
Assert.assertTrue(vol0 instanceof HddsVolume);
File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
File containerDir = new File(containerTopDir, "1");
File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
File[] chunkFiles = chunksDir.listFiles();
Assert.assertNotNull(chunkFiles);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.injectDataFileFailure(chunkFile);
}
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
DatanodeTestUtils.simulateBadVolume(vol0);
// read written file to trigger checkVolumeAsync
OzoneInputStream is = bucket.readKey(keyName);
byte[] fileContent = new byte[value.getBytes(UTF_8).length];
try {
is.read(fileContent);
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
} finally {
is.close();
}
// should trigger checkVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore for cleanup
DatanodeTestUtils.restoreBadVolume(vol0);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
}
}
Aggregations