use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnChunkFileCorrupt.
@Test
public void testHddsVolumeFailureOnChunkFileCorrupt() throws Exception {
// write a file
String keyName = UUID.randomUUID().toString();
String value = "sample value";
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OzoneKey key = bucket.getKey(keyName);
Assert.assertEquals(keyName, key.getName());
// corrupt chunk file by rename file->dir
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
Assert.assertTrue(vol0 instanceof HddsVolume);
File clusterDir = DatanodeTestUtils.getHddsVolumeClusterDir((HddsVolume) vol0);
File currentDir = new File(clusterDir, Storage.STORAGE_DIR_CURRENT);
File containerTopDir = new File(currentDir, Storage.CONTAINER_DIR + "0");
File containerDir = new File(containerTopDir, "1");
File chunksDir = new File(containerDir, OzoneConsts.STORAGE_DIR_CHUNKS);
File[] chunkFiles = chunksDir.listFiles();
Assert.assertNotNull(chunkFiles);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.injectDataFileFailure(chunkFile);
}
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
DatanodeTestUtils.simulateBadVolume(vol0);
// read written file to trigger checkVolumeAsync
OzoneInputStream is = bucket.readKey(keyName);
byte[] fileContent = new byte[value.getBytes(UTF_8).length];
try {
is.read(fileContent);
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
} finally {
is.close();
}
// should trigger checkVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore for cleanup
DatanodeTestUtils.restoreBadVolume(vol0);
for (File chunkFile : chunkFiles) {
DatanodeTestUtils.restoreDataFileFromFailure(chunkFile);
}
}
use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnDbFileCorrupt.
@Test
public void testHddsVolumeFailureOnDbFileCorrupt() throws Exception {
// write a file, will create container1
String keyName = UUID.randomUUID().toString();
String value = "sample value";
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, RATIS, ONE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OzoneKey key = bucket.getKey(keyName);
Assert.assertEquals(keyName, key.getName());
// close container1
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
Container c1 = oc.getContainerSet().getContainer(1);
c1.close();
// create container2, and container1 is kicked out of cache
ContainerWithPipeline c2 = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
Assert.assertTrue(c2.getContainerInfo().getState().equals(HddsProtos.LifeCycleState.OPEN));
// corrupt db by rename dir->file
File metadataDir = new File(c1.getContainerFile().getParent());
File dbDir = new File(metadataDir, "1" + OzoneConsts.DN_CONTAINER_DB);
DatanodeTestUtils.injectDataDirFailure(dbDir);
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
DatanodeTestUtils.simulateBadVolume(vol0);
// read written file to trigger checkVolumeAsync
OzoneInputStream is = bucket.readKey(keyName);
byte[] fileContent = new byte[value.getBytes(UTF_8).length];
try {
is.read(fileContent);
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
} finally {
is.close();
}
// should trigger CheckVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore all
DatanodeTestUtils.restoreBadVolume(vol0);
DatanodeTestUtils.restoreDataDirFromFailure(dbDir);
}
use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.
the class TestDatanodeHddsVolumeFailureDetection method testHddsVolumeFailureOnContainerFileCorrupt.
@Test
public void testHddsVolumeFailureOnContainerFileCorrupt() throws Exception {
// create a container
ContainerWithPipeline container = scmClient.createContainer(HddsProtos.ReplicationType.STAND_ALONE, HddsProtos.ReplicationFactor.ONE, OzoneConsts.OZONE);
// corrupt container file by removing write permission on
// container metadata dir, since container update operation
// use a create temp & rename way, so we can't just rename
// container file to simulate corruption
HddsDatanodeService dn = datanodes.get(0);
OzoneContainer oc = dn.getDatanodeStateMachine().getContainer();
MutableVolumeSet volSet = oc.getVolumeSet();
StorageVolume vol0 = volSet.getVolumesList().get(0);
Container c1 = oc.getContainerSet().getContainer(container.getContainerInfo().getContainerID());
File metadataDir = new File(c1.getContainerFile().getParent());
DatanodeTestUtils.injectContainerMetaDirFailure(metadataDir);
// simulate bad volume by removing write permission on root dir
// refer to HddsVolume.check()
DatanodeTestUtils.simulateBadVolume(vol0);
// close container to trigger checkVolumeAsync
try {
c1.close();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e instanceof IOException);
}
// should trigger CheckVolumeAsync and
// a failed volume should be detected
DatanodeTestUtils.waitForCheckVolume(volSet, 1L);
DatanodeTestUtils.waitForHandleFailedVolume(volSet, 1);
// restore for cleanup
DatanodeTestUtils.restoreBadVolume(vol0);
DatanodeTestUtils.restoreContainerMetaDirFromFailure(metadataDir);
}
use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.
the class ClosedContainerReplicator method initializeReplicationSupervisor.
@NotNull
private void initializeReplicationSupervisor(ConfigurationSource conf) throws IOException {
String fakeDatanodeUuid = datanode;
if (fakeDatanodeUuid.isEmpty()) {
fakeDatanodeUuid = UUID.randomUUID().toString();
}
ContainerSet containerSet = new ContainerSet();
ContainerMetrics metrics = ContainerMetrics.create(conf);
MutableVolumeSet volumeSet = new MutableVolumeSet(fakeDatanodeUuid, conf, null, StorageVolume.VolumeType.DATA_VOLUME, null);
Map<ContainerType, Handler> handlers = new HashMap<>();
for (ContainerType containerType : ContainerType.values()) {
final Handler handler = Handler.getHandlerForContainerType(containerType, conf, fakeDatanodeUuid, containerSet, volumeSet, metrics, containerReplicaProto -> {
});
handler.setClusterID(UUID.randomUUID().toString());
handlers.put(containerType, handler);
}
ContainerController controller = new ContainerController(containerSet, handlers);
ContainerReplicator replicator = new DownloadAndImportReplicator(containerSet, controller, new SimpleContainerDownloader(conf, null), new TarContainerPacker());
supervisor = new ReplicationSupervisor(containerSet, replicator, 10);
}
use of org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet in project ozone by apache.
the class HddsDatanodeService method startRatisForTest.
/**
* Initialize and start Ratis server.
* <p>
* In normal case this initialization is done after the SCM registration.
* In can be forced to make it possible to test one, single, isolated
* datanode.
*/
private void startRatisForTest() throws IOException {
String clusterId = "clusterId";
datanodeStateMachine.getContainer().start(clusterId);
MutableVolumeSet volumeSet = getDatanodeStateMachine().getContainer().getVolumeSet();
Map<String, StorageVolume> volumeMap = volumeSet.getVolumeMap();
for (Map.Entry<String, StorageVolume> entry : volumeMap.entrySet()) {
HddsVolume hddsVolume = (HddsVolume) entry.getValue();
boolean result = HddsVolumeUtil.checkVolume(hddsVolume, clusterId, clusterId, conf, LOG);
if (!result) {
volumeSet.failVolume(hddsVolume.getHddsRootDir().getPath());
}
}
}
Aggregations