use of org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException in project ozone by apache.
the class TestScmSafeMode method testSCMSafeMode.
@Test(timeout = 300_000)
public void testSCMSafeMode() throws Exception {
// Test1: Test safe mode when there are no containers in system.
cluster.stop();
try {
cluster = builder.build();
} catch (IOException e) {
Assert.fail("Cluster startup failed.");
}
assertTrue(cluster.getStorageContainerManager().isInSafeMode());
cluster.startHddsDatanodes();
cluster.waitForClusterToBeReady();
cluster.waitTobeOutOfSafeMode();
assertFalse(cluster.getStorageContainerManager().isInSafeMode());
// Test2: Test safe mode when containers are there in system.
// Create {numKeys} random names keys.
TestStorageContainerManagerHelper helper = new TestStorageContainerManagerHelper(cluster, conf);
Map<String, OmKeyInfo> keyLocations = helper.createKeys(100 * 2, 4096);
final List<ContainerInfo> containers = cluster.getStorageContainerManager().getContainerManager().getContainers();
GenericTestUtils.waitFor(() -> containers.size() >= 3, 100, 1000 * 30);
// Removing some container to keep them open.
containers.remove(0);
containers.remove(0);
// Close remaining containers
ContainerManager mapping = cluster.getStorageContainerManager().getContainerManager();
containers.forEach(c -> {
try {
mapping.updateContainerState(c.containerID(), HddsProtos.LifeCycleEvent.FINALIZE);
mapping.updateContainerState(c.containerID(), LifeCycleEvent.CLOSE);
} catch (IOException | InvalidStateTransitionException e) {
LOG.info("Failed to change state of open containers.", e);
}
});
cluster.stop();
GenericTestUtils.LogCapturer logCapturer = GenericTestUtils.LogCapturer.captureLogs(SCMSafeModeManager.getLogger());
logCapturer.clearOutput();
try {
cluster = builder.build();
} catch (IOException ex) {
fail("failed");
}
StorageContainerManager scm;
scm = cluster.getStorageContainerManager();
assertTrue(scm.isInSafeMode());
assertFalse(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertTrue(scm.getCurrentContainerThreshold() == 0);
for (HddsDatanodeService dn : cluster.getHddsDatanodes()) {
dn.start();
}
GenericTestUtils.waitFor(() -> scm.getCurrentContainerThreshold() == 1.0, 100, 20000);
EventQueue eventQueue = (EventQueue) cluster.getStorageContainerManager().getEventQueue();
eventQueue.processAll(5000L);
double safeModeCutoff = conf.getDouble(HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
assertTrue(scm.getCurrentContainerThreshold() >= safeModeCutoff);
assertTrue(logCapturer.getOutput().contains("SCM exiting safe mode."));
assertFalse(scm.isInSafeMode());
}
use of org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException in project ozone by apache.
the class PipelineManagerImpl method closeContainersForPipeline.
/**
* Fire events to close all containers related to the input pipeline.
* @param pipelineId - ID of the pipeline.
* @throws IOException
*/
protected void closeContainersForPipeline(final PipelineID pipelineId) throws IOException {
Set<ContainerID> containerIDs = stateManager.getContainers(pipelineId);
ContainerManager containerManager = scmContext.getScm().getContainerManager();
for (ContainerID containerID : containerIDs) {
if (containerManager.getContainer(containerID).getState() == HddsProtos.LifeCycleState.OPEN) {
try {
containerManager.updateContainerState(containerID, HddsProtos.LifeCycleEvent.FINALIZE);
} catch (InvalidStateTransitionException ex) {
throw new IOException(ex);
}
}
eventPublisher.fireEvent(SCMEvents.CLOSE_CONTAINER, containerID);
LOG.info("Container {} closed for pipeline={}", containerID, pipelineId);
}
}
use of org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException in project ozone by apache.
the class NodeStateManager method updateNodeLayoutVersionState.
/**
* Updates the node state if the condition satisfies.
*
* @param node DatanodeInfo
* @param condition condition to check
* @param status current state of node
* @param lifeCycleEvent NodeLifeCycleEvent to be applied if condition
* matches
*
* @throws NodeNotFoundException if the node is not present
*/
private void updateNodeLayoutVersionState(DatanodeInfo node, Predicate<LayoutVersionProto> condition, NodeStatus status, NodeLifeCycleEvent lifeCycleEvent) throws NodeNotFoundException {
try {
if (condition.test(node.getLastKnownLayoutVersion())) {
NodeState newHealthState = nodeHealthSM.getNextState(status.getHealth(), lifeCycleEvent);
NodeStatus newStatus = nodeStateMap.updateNodeHealthState(node.getUuid(), newHealthState);
fireHealthStateEvent(newStatus.getHealth(), node);
}
} catch (InvalidStateTransitionException e) {
LOG.warn("Invalid state transition of node {}." + " Current state: {}, life cycle event: {}", node, status, lifeCycleEvent);
}
}
use of org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException in project ozone by apache.
the class ContainerStateManagerImpl method updateContainerState.
@Override
public void updateContainerState(final HddsProtos.ContainerID containerID, final LifeCycleEvent event) throws IOException, InvalidStateTransitionException {
// TODO: Remove the protobuf conversion after fixing ContainerStateMap.
final ContainerID id = ContainerID.getFromProtobuf(containerID);
lock.writeLock().lock();
try {
if (containers.contains(id)) {
final ContainerInfo oldInfo = containers.getContainerInfo(id);
final LifeCycleState oldState = oldInfo.getState();
final LifeCycleState newState = stateMachine.getNextState(oldInfo.getState(), event);
if (newState.getNumber() > oldState.getNumber()) {
ExecutionUtil.create(() -> {
containers.updateState(id, oldState, newState);
transactionBuffer.addToBuffer(containerStore, id, containers.getContainerInfo(id));
}).onException(() -> {
transactionBuffer.addToBuffer(containerStore, id, oldInfo);
containers.updateState(id, newState, oldState);
}).execute();
containerStateChangeActions.getOrDefault(event, info -> {
}).execute(oldInfo);
}
}
} finally {
lock.writeLock().unlock();
}
}
use of org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException in project ozone by apache.
the class ReconIncrementalContainerReportHandler method onMessage.
@Override
public void onMessage(final IncrementalContainerReportFromDatanode report, final EventPublisher publisher) {
final DatanodeDetails dnFromReport = report.getDatanodeDetails();
if (LOG.isDebugEnabled()) {
LOG.debug("Processing incremental container report from data node {}", dnFromReport);
}
DatanodeDetails dd = getNodeManager().getNodeByUuid(dnFromReport.getUuidString());
if (dd == null) {
LOG.warn("Received container report from unknown datanode {}", dnFromReport);
return;
}
ReconContainerManager containerManager = (ReconContainerManager) getContainerManager();
boolean success = true;
for (ContainerReplicaProto replicaProto : report.getReport().getReportList()) {
try {
final ContainerID id = ContainerID.valueOf(replicaProto.getContainerID());
try {
containerManager.checkAndAddNewContainer(id, replicaProto.getState(), report.getDatanodeDetails());
} catch (Exception ioEx) {
LOG.error("Exception while checking and adding new container.", ioEx);
return;
}
getNodeManager().addContainer(dd, id);
processContainerReplica(dd, replicaProto, publisher);
} catch (ContainerNotFoundException e) {
success = false;
LOG.warn("Container {} not found!", replicaProto.getContainerID());
} catch (NodeNotFoundException ex) {
success = false;
LOG.error("Received ICR from unknown datanode {}.", report.getDatanodeDetails(), ex);
} catch (IOException | InvalidStateTransitionException e) {
success = false;
LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID());
}
}
containerManager.notifyContainerReportProcessing(false, success);
}
Aggregations