use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.
the class TestScmSafeMode method testSCMSafeMode.
@Test(timeout = 300_000)
public void testSCMSafeMode() throws Exception {
// Test1: Test safe mode when there are no containers in system.
cluster.stop();
try {
cluster = builder.build();
} catch (IOException e) {
Assert.fail("Cluster startup failed.");
}
assertTrue(cluster.getStorageContainerManager().isInSafeMode());
cluster.startHddsDatanodes();
cluster.waitForClusterToBeReady();
cluster.waitTobeOutOfSafeMode();
assertFalse(cluster.getStorageContainerManager().isInSafeMode());
// Test2: Test safe mode when containers are there in system.
// Create {numKeys} random names keys.
TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
final List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
// Removing some container to keep them open.
containers.remove(0);
containers.remove(0);
// Close remaining containers
ContainerManager mapping = cluster.getStorageContainerManager().getContainerManager();
containers.forEach(c -> {
try {
mapping.updateContainerState(c.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
mapping.updateContainerState(c.containerID(), LifeCycleEvent.CLOSE);
} catch (IOException | InvalidStateTransitionException e) {
LOG.info("Failed to change state of open containers.", e);
}
});
cluster.stop();
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(SCMSafeModeManager.getLogger());
logCapturer.clearOutput();
try {
cluster = builder.build();
} catch (IOException ex) {
fail("failed");
}
StorageContainerManager scm;
scm = cluster.getStorageContainerManager();
assertTrue(scm.isInSafeMode());
assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertTrue(scm.getCurrentContainerThreshold() == 0);
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
dn.start();
}
GenericTestUtils.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
EventQueue eventQueue = (EventQueue) cluster.getStorageContainerManager().getEventQueue();
eventQueue.processAll(5000L);
double safeModeCutoff = conf.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertFalse(scm.isInSafeMode());
}
use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.
the class TestPipelineClose method testPipelineCloseWithLogFailure.
@Test
@Ignore("HDDS-5604")
public void testPipelineCloseWithLogFailure() throws IOException {
EventQueue eventQ = (EventQueue) scm.getEventQueue();
PipelineActionHandler pipelineActionTest = Mockito.mock(PipelineActionHandler.class);
eventQ.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionTest);
ArgumentCaptor<PipelineActionsFromDatanode> actionCaptor = ArgumentCaptor.forClass(PipelineActionsFromDatanode.class);
ContainerInfo containerInfo = containerManager.allocateContainer(RatisReplicationConfig.getInstance(ReplicationFactor.THREE), "testOwner");
ContainerWithPipeline containerWithPipeline = new ContainerWithPipeline(containerInfo, pipelineManager.getPipeline(containerInfo.getPipelineID()));
Pipeline openPipeline = containerWithPipeline.getPipeline();
RaftGroupId groupId = RaftGroupId.valueOf(openPipeline.getId().getId());
try {
pipelineManager.getPipeline(openPipeline.getId());
} catch (PipelineNotFoundException e) {
Assert.assertTrue("pipeline should exist", false);
}
DatanodeDetails datanodeDetails = openPipeline.getNodes().get(0);
int index = cluster.getHddsDatanodeIndex(datanodeDetails);
XceiverServerRatis xceiverRatis = (XceiverServerRatis) cluster.getHddsDatanodes().get(index).getDatanodeStateMachine().getContainer().getWriteChannel();
/**
* Notify Datanode Ratis Server endpoint of a Ratis log failure.
* This is expected to trigger an immediate pipeline actions report to SCM
*/
xceiverRatis.handleNodeLogFailure(groupId, null);
// verify SCM receives a pipeline action report "immediately"
Mockito.verify(pipelineActionTest, Mockito.timeout(100)).onMessage(actionCaptor.capture(), Mockito.any(EventPublisher.class));
PipelineActionsFromDatanode actionsFromDatanode = actionCaptor.getValue();
// match the pipeline id
verifyCloseForPipeline(openPipeline, actionsFromDatanode);
}
use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.
the class TestPipelineClose method testPipelineCloseWithPipelineAction.
@Test
public void testPipelineCloseWithPipelineAction() throws Exception {
List<DatanodeDetails> dns = ratisContainer.getPipeline().getNodes();
PipelineActionsFromDatanode pipelineActionsFromDatanode = HddsTestUtils.getPipelineActionFromDatanode(dns.get(0), ratisContainer.getPipeline().getId());
// send closing action for pipeline
PipelineActionHandler pipelineActionHandler = new PipelineActionHandler(pipelineManager, SCMContext.emptyContext(), conf);
pipelineActionHandler.onMessage(pipelineActionsFromDatanode, new EventQueue());
Thread.sleep(5000);
OzoneContainer ozoneContainer = cluster.getHddsDatanodes().get(0).getDatanodeStateMachine().getContainer();
List<PipelineReport> pipelineReports = ozoneContainer.getPipelineReport().getPipelineReportList();
for (PipelineReport pipelineReport : pipelineReports) {
// ensure the pipeline is not reported by any dn
Assert.assertNotEquals(PipelineID.getFromProtobuf(pipelineReport.getPipelineID()), ratisContainer.getPipeline().getId());
}
try {
pipelineManager.getPipeline(ratisContainer.getPipeline().getId());
Assert.fail("Pipeline should not exist in SCM");
} catch (PipelineNotFoundException e) {
}
}
use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.
the class TestReconNodeManager method testUpdateNodeOperationalStateFromScm.
@Test
public void testUpdateNodeOperationalStateFromScm() throws Exception {
ReconStorageConfig scmStorageConfig = new ReconStorageConfig(conf, new ReconUtils());
EventQueue eventQueue = new EventQueue();
NetworkTopology clusterMap = new NetworkTopologyImpl(conf);
Table<UUID, DatanodeDetails> nodeTable = ReconSCMDBDefinition.NODES.getTable(store);
ReconNodeManager reconNodeManager = new ReconNodeManager(conf, scmStorageConfig, eventQueue, clusterMap, nodeTable, versionManager);
DatanodeDetails datanodeDetails = randomDatanodeDetails();
HddsProtos.Node node = mock(HddsProtos.Node.class);
LambdaTestUtils.intercept(NodeNotFoundException.class, () -> {
reconNodeManager.updateNodeOperationalStateFromScm(node, datanodeDetails);
});
reconNodeManager.register(datanodeDetails, null, null);
assertEquals(IN_SERVICE, reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getPersistedOpState());
when(node.getNodeOperationalStates(eq(0))).thenReturn(DECOMMISSIONING);
reconNodeManager.updateNodeOperationalStateFromScm(node, datanodeDetails);
assertEquals(DECOMMISSIONING, reconNodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getPersistedOpState());
List<DatanodeDetails> nodes = reconNodeManager.getNodes(DECOMMISSIONING, null);
assertEquals(1, nodes.size());
assertEquals(datanodeDetails.getUuid(), nodes.get(0).getUuid());
}
use of org.apache.hadoop.hdds.server.events.EventQueue in project ozone by apache.
the class TestBlockDeletion method testContainerStatisticsAfterDelete.
@Test
public void testContainerStatisticsAfterDelete() throws Exception {
String volumeName = UUID.randomUUID().toString();
String bucketName = UUID.randomUUID().toString();
String value = RandomStringUtils.random(1024 * 1024);
store.createVolume(volumeName);
OzoneVolume volume = store.getVolume(volumeName);
volume.createBucket(bucketName);
OzoneBucket bucket = volume.getBucket(bucketName);
String keyName = UUID.randomUUID().toString();
OzoneOutputStream out = bucket.createKey(keyName, value.getBytes(UTF_8).length, ReplicationType.RATIS, ReplicationFactor.THREE, new HashMap<>());
out.write(value.getBytes(UTF_8));
out.close();
OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName).setBucketName(bucketName).setKeyName(keyName).setDataSize(0).setReplicationConfig(RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE)).setRefreshPipeline(true).build();
List<OmKeyLocationInfoGroup> omKeyLocationInfoGroupList = om.lookupKey(keyArgs).getKeyLocationVersions();
Thread.sleep(5000);
List<ContainerInfo> containerInfos = scm.getContainerManager().getContainers();
final int valueSize = value.getBytes(UTF_8).length;
final int keyCount = 1;
containerInfos.stream().forEach(container -> {
Assert.assertEquals(valueSize, container.getUsedBytes());
Assert.assertEquals(keyCount, container.getNumberOfKeys());
});
OzoneTestUtils.closeAllContainers(scm.getEventQueue(), scm);
// Wait for container to close
Thread.sleep(2000);
// make sure the containers are closed on the dn
omKeyLocationInfoGroupList.forEach((group) -> {
List<OmKeyLocationInfo> locationInfo = group.getLocationList();
locationInfo.forEach((info) -> cluster.getHddsDatanodes().get(0).getDatanodeStateMachine().getContainer().getContainerSet().getContainer(info.getContainerID()).getContainerData().setState(ContainerProtos.ContainerDataProto.State.CLOSED));
});
writeClient.deleteKey(keyArgs);
// Wait for blocks to be deleted and container reports to be processed
Thread.sleep(5000);
containerInfos = scm.getContainerManager().getContainers();
containerInfos.stream().forEach(container -> {
Assert.assertEquals(0, container.getUsedBytes());
Assert.assertEquals(0, container.getNumberOfKeys());
});
// Verify that pending block delete num are as expected with resent cmds
cluster.getHddsDatanodes().forEach(dn -> {
Map<Long, Container<?>> containerMap = dn.getDatanodeStateMachine().getContainer().getContainerSet().getContainerMap();
containerMap.values().forEach(container -> {
KeyValueContainerData containerData = (KeyValueContainerData) container.getContainerData();
Assert.assertEquals(0, containerData.getNumPendingDeletionBlocks());
});
});
cluster.shutdownHddsDatanode(0);
scm.getReplicationManager().processAll();
((EventQueue) scm.getEventQueue()).processAll(1000);
containerInfos = scm.getContainerManager().getContainers();
containerInfos.stream().forEach(container -> Assert.assertEquals(HddsProtos.LifeCycleState.DELETING, container.getState()));
LogCapturer logCapturer = LogCapturer.captureLogs(ReplicationManager.LOG);
logCapturer.clearOutput();
scm.getReplicationManager().processAll();
((EventQueue) scm.getEventQueue()).processAll(1000);
GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains("Resend delete Container"), 500, 3000);
cluster.restartHddsDatanode(0, true);
Thread.sleep(1000);
scm.getReplicationManager().processAll();
((EventQueue) scm.getEventQueue()).processAll(1000);
GenericTestUtils.waitFor(() -> {
List<ContainerInfo> infos = scm.getContainerManager().getContainers();
try {
infos.stream().forEach(container -> {
Assert.assertEquals(HddsProtos.LifeCycleState.DELETED, container.getState());
try {
Assert.assertTrue(scm.getScmMetadataStore().getContainerTable().get(container.containerID()).getState() == HddsProtos.LifeCycleState.DELETED);
} catch (IOException e) {
Assert.fail("Container from SCM DB should be marked as DELETED");
}
});
} catch (Throwable e) {
return false;
}
return true;
}, 500, 5000);
LOG.info(metrics.toString());
}
Aggregations