use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestScmSafeMode method testSCMSafeMode.
@Test(timeout = 300_000)
public void testSCMSafeMode() throws Exception {
// Test1: Test safe mode when there are no containers in system.
cluster.stop();
try {
cluster = builder.build();
} catch (IOException e) {
Assert.fail("Cluster startup failed.");
}
assertTrue(cluster.getStorageContainerManager().isInSafeMode());
cluster.startHddsDatanodes();
cluster.waitForClusterToBeReady();
cluster.waitTobeOutOfSafeMode();
assertFalse(cluster.getStorageContainerManager().isInSafeMode());
// Test2: Test safe mode when containers are there in system.
// Create {numKeys} random names keys.
TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
final List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
// Removing some container to keep them open.
containers.remove(0);
containers.remove(0);
// Close remaining containers
ContainerManager mapping = cluster.getStorageContainerManager().getContainerManager();
containers.forEach(c -> {
try {
mapping.updateContainerState(c.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
mapping.updateContainerState(c.containerID(), LifeCycleEvent.CLOSE);
} catch (IOException | InvalidStateTransitionException e) {
LOG.info("Failed to change state of open containers.", e);
}
});
cluster.stop();
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(SCMSafeModeManager.getLogger());
logCapturer.clearOutput();
try {
cluster = builder.build();
} catch (IOException ex) {
fail("failed");
}
StorageContainerManager scm;
scm = cluster.getStorageContainerManager();
assertTrue(scm.isInSafeMode());
assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertTrue(scm.getCurrentContainerThreshold() == 0);
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
dn.start();
}
GenericTestUtils.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
EventQueue eventQueue = (EventQueue) cluster.getStorageContainerManager().getEventQueue();
eventQueue.processAll(5000L);
double safeModeCutoff = conf.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertFalse(scm.isInSafeMode());
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestRatisPipelineCreateAndDestroy method testPipelineCreationOnNodeRestart.
@Test(timeout = 180000)
public void testPipelineCreationOnNodeRestart() throws Exception {
conf.setTimeDuration(OZONE_SCM_STALENODE_INTERVAL, 5, TimeUnit.SECONDS);
init(3);
// make sure a pipelines is created
waitForPipelines(1);
List<HddsDatanodeService> dns = new ArrayList<>(cluster.getHddsDatanodes());
List<Pipeline> pipelines = pipelineManager.getPipelines(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
for (HddsDatanodeService dn : dns) {
cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
}
// try creating another pipeline now
try {
pipelineManager.createPipeline(RatisReplicationConfig.getInstance(ReplicationFactor.THREE));
Assert.fail("pipeline creation should fail after shutting down pipeline");
} catch (IOException ioe) {
// As now all datanodes are shutdown, they move to stale state, there
// will be no sufficient datanodes to create the pipeline.
Assert.assertTrue(ioe instanceof SCMException);
Assert.assertEquals(SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE, ((SCMException) ioe).getResult());
}
// make sure pipelines is destroyed
waitForPipelines(0);
for (HddsDatanodeService dn : dns) {
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
}
// destroy the existing pipelines
for (Pipeline pipeline : pipelines) {
pipelineManager.closePipeline(pipeline, false);
}
if (cluster.getStorageContainerManager().getScmNodeManager().getNodeCount(NodeStatus.inServiceHealthy()) >= HddsProtos.ReplicationFactor.THREE.getNumber()) {
// make sure pipelines is created after node start
cluster.getStorageContainerManager().getSCMServiceManager().notifyEventTriggered(Event.PRE_CHECK_COMPLETED);
waitForPipelines(1);
}
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class Test2WayCommitInRatis method test2WayCommitForRetryfailure.
@Test
public void test2WayCommitForRetryfailure() throws Exception {
OzoneConfiguration conf = new OzoneConfiguration();
startCluster(conf);
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(XceiverClientRatis.LOG);
XceiverClientManager clientManager = new XceiverClientManager(conf);
ContainerWithPipeline container1 = storageContainerLocationClient.allocateContainer(HddsProtos.ReplicationType.RATIS, HddsProtos.ReplicationFactor.THREE, OzoneConsts.OZONE);
XceiverClientSpi xceiverClient = clientManager.acquireClient(container1.getPipeline());
Assert.assertEquals(1, xceiverClient.getRefcount());
Assert.assertEquals(container1.getPipeline(), xceiverClient.getPipeline());
Pipeline pipeline = xceiverClient.getPipeline();
XceiverClientRatis ratisClient = (XceiverClientRatis) xceiverClient;
XceiverClientReply reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCreateContainerRequest(container1.getContainerInfo().getContainerID(), xceiverClient.getPipeline()));
reply.getResponse().get();
Assert.assertEquals(3, ratisClient.getCommitInfoMap().size());
// wait for the container to be created on all the nodes
xceiverClient.watchForCommit(reply.getLogIndex());
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
// shutdown the ratis follower
if (RatisTestHelper.isRatisFollower(dn, pipeline)) {
cluster.shutdownHddsDatanode(dn.getDatanodeDetails());
break;
}
}
reply = xceiverClient.sendCommandAsync(ContainerTestHelper.getCloseContainer(pipeline, container1.getContainerInfo().getContainerID()));
reply.getResponse().get();
xceiverClient.watchForCommit(reply.getLogIndex());
// commitInfo Map will be reduced to 2 here
Assert.assertEquals(2, ratisClient.getCommitInfoMap().size());
clientManager.releaseClient(xceiverClient, false);
Assert.assertTrue(logCapturer.getOutput().contains("3 way commit failed"));
Assert.assertTrue(logCapturer.getOutput().contains("Committed by majority"));
logCapturer.stopCapturing();
shutdown();
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerReplicationEndToEnd method testContainerReplication.
/**
* The test simulates end to end container replication.
*/
@Test
public void testContainerReplication() throws Exception {
String keyName = "testContainerReplication";
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey(keyName, 0, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
byte[] testData = "ratis".getBytes(UTF_8);
// First write and flush creates a container in the datanode
key.write(testData);
key.flush();
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
long containerID = omKeyLocationInfo.getContainerID();
PipelineID pipelineID = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getPipelineID();
Pipeline pipeline = cluster.getStorageContainerManager().getPipelineManager().getPipeline(pipelineID);
key.close();
HddsProtos.LifeCycleState containerState = cluster.getStorageContainerManager().getContainerManager().getContainer(ContainerID.valueOf(containerID)).getState();
LoggerFactory.getLogger(TestContainerReplicationEndToEnd.class).info("Current Container State is {}", containerState);
if ((containerState != HddsProtos.LifeCycleState.CLOSING) && (containerState != HddsProtos.LifeCycleState.CLOSED)) {
cluster.getStorageContainerManager().getContainerManager().updateContainerState(ContainerID.valueOf(containerID), HddsProtos.LifeCycleEvent.FINALIZE);
}
// wait for container to move to OPEN state in SCM
Thread.sleep(2 * containerReportInterval);
DatanodeDetails oldReplicaNode = pipeline.getFirstNode();
// now move the container to the closed on the datanode.
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
xceiverClient.sendCommand(request.build());
// wait for container to move to closed state in SCM
Thread.sleep(2 * containerReportInterval);
Assert.assertTrue(cluster.getStorageContainerManager().getContainerInfo(containerID).getState() == HddsProtos.LifeCycleState.CLOSED);
// shutdown the replica node
cluster.shutdownHddsDatanode(oldReplicaNode);
// now the container is under replicated and will be moved to a different dn
HddsDatanodeService dnService = null;
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
Predicate<DatanodeDetails> p = i -> i.getUuid().equals(dn.getDatanodeDetails().getUuid());
if (!pipeline.getNodes().stream().anyMatch(p)) {
dnService = dn;
}
}
Assert.assertNotNull(dnService);
final HddsDatanodeService newReplicaNode = dnService;
// wait for the container to get replicated
GenericTestUtils.waitFor(() -> {
return newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID) != null;
}, 500, 100000);
Assert.assertTrue(newReplicaNode.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerData().getBlockCommitSequenceId() > 0);
// wait for SCM to update the replica Map
Thread.sleep(5 * containerReportInterval);
// the key again
for (DatanodeDetails dn : pipeline.getNodes()) {
cluster.shutdownHddsDatanode(dn);
}
// This will try to read the data from the dn to which the container got
// replicated after the container got closed.
TestHelper.validateData(keyName, testData, objectStore, volumeName, bucketName);
}
use of org.apache.hadoop.ozone.HddsDatanodeService in project ozone by apache.
the class TestContainerStateMachineFailures method testApplyTransactionIdempotencyWithClosedContainer.
@Test
public void testApplyTransactionIdempotencyWithClosedContainer() throws Exception {
OzoneOutputStream key = objectStore.getVolume(volumeName).getBucket(bucketName).createKey("ratis", 1024, ReplicationType.RATIS, ReplicationFactor.ONE, new HashMap<>());
// First write and flush creates a container in the datanode
key.write("ratis".getBytes(UTF_8));
key.flush();
key.write("ratis".getBytes(UTF_8));
KeyOutputStream groupOutputStream = (KeyOutputStream) key.getOutputStream();
List<OmKeyLocationInfo> locationInfoList = groupOutputStream.getLocationInfoList();
Assert.assertEquals(1, locationInfoList.size());
OmKeyLocationInfo omKeyLocationInfo = locationInfoList.get(0);
HddsDatanodeService dn = TestHelper.getDatanodeService(omKeyLocationInfo, cluster);
ContainerData containerData = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainer(omKeyLocationInfo.getContainerID()).getContainerData();
Assert.assertTrue(containerData instanceof KeyValueContainerData);
key.close();
ContainerStateMachine stateMachine = (ContainerStateMachine) TestHelper.getStateMachine(dn, omKeyLocationInfo.getPipeline());
SimpleStateMachineStorage storage = (SimpleStateMachineStorage) stateMachine.getStateMachineStorage();
Path parentPath = storage.findLatestSnapshot().getFile().getPath();
stateMachine.takeSnapshot();
Assert.assertTrue(parentPath.getParent().toFile().listFiles().length > 0);
FileInfo snapshot = storage.findLatestSnapshot().getFile();
Assert.assertNotNull(snapshot);
long containerID = omKeyLocationInfo.getContainerID();
Pipeline pipeline = cluster.getStorageContainerLocationClient().getContainerWithPipeline(containerID).getPipeline();
XceiverClientSpi xceiverClient = xceiverClientManager.acquireClient(pipeline);
ContainerProtos.ContainerCommandRequestProto.Builder request = ContainerProtos.ContainerCommandRequestProto.newBuilder();
request.setDatanodeUuid(pipeline.getFirstNode().getUuidString());
request.setCmdType(ContainerProtos.Type.CloseContainer);
request.setContainerID(containerID);
request.setCloseContainer(ContainerProtos.CloseContainerRequestProto.getDefaultInstance());
try {
xceiverClient.sendCommand(request.build());
} catch (IOException e) {
Assert.fail("Exception should not be thrown");
}
Assert.assertTrue(TestHelper.getDatanodeService(omKeyLocationInfo, cluster).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(containerID).getContainerState() == ContainerProtos.ContainerDataProto.State.CLOSED);
Assert.assertTrue(stateMachine.isStateMachineHealthy());
try {
stateMachine.takeSnapshot();
} catch (IOException ioe) {
Assert.fail("Exception should not be thrown");
}
FileInfo latestSnapshot = storage.findLatestSnapshot().getFile();
Assert.assertFalse(snapshot.getPath().equals(latestSnapshot.getPath()));
}
Aggregations