use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.
the class NodeStateManager method updateNodeLayoutVersionState.
/**
* Updates the node state if the condition satisfies.
*
* @param node DatanodeInfo
* @param condition condition to check
* @param status current state of node
* @param lifeCycleEvent NodeLifeCycleEvent to be applied if condition
* matches
*
* @throws NodeNotFoundException if the node is not present
*/
private void updateNodeLayoutVersionState(DatanodeInfo node, Predicate<LayoutVersionProto> condition, NodeStatus status, NodeLifeCycleEvent lifeCycleEvent) throws NodeNotFoundException {
try {
if (condition.test(node.getLastKnownLayoutVersion())) {
NodeState newHealthState = nodeHealthSM.getNextState(status.getHealth(), lifeCycleEvent);
NodeStatus newStatus = nodeStateMap.updateNodeHealthState(node.getUuid(), newHealthState);
fireHealthStateEvent(newStatus.getHealth(), node);
}
} catch (InvalidStateTransitionException e) {
LOG.warn("Invalid state transition of node {}." + " Current state: {}, life cycle event: {}", node, status, lifeCycleEvent);
}
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.
the class ReplicationManager method move.
/**
* add a move action for a given container.
*
* @param cid Container to move
* @param mp MoveDataNodePair which contains source and target datanodes
*/
public CompletableFuture<MoveResult> move(ContainerID cid, MoveDataNodePair mp) throws ContainerNotFoundException, NodeNotFoundException {
CompletableFuture<MoveResult> ret = new CompletableFuture<>();
if (!isRunning()) {
ret.complete(MoveResult.FAIL_NOT_RUNNING);
return ret;
}
if (!scmContext.isLeader()) {
ret.complete(MoveResult.FAIL_NOT_LEADER);
return ret;
}
/*
* make sure the flowing conditions are met:
* 1 the given two datanodes are in healthy state
* 2 the given container exists on the given source datanode
* 3 the given container does not exist on the given target datanode
* 4 the given container is in closed state
* 5 the giver container is not taking any inflight action
* 6 the given two datanodes are in IN_SERVICE state
* 7 {Existing replicas + Target_Dn - Source_Dn} satisfies
* the placement policy
*
* move is a combination of two steps : replication and deletion.
* if the conditions above are all met, then we take a conservative
* strategy here : replication can always be executed, but the execution
* of deletion always depends on placement policy
*/
DatanodeDetails srcDn = mp.getSrc();
DatanodeDetails targetDn = mp.getTgt();
NodeStatus currentNodeStat = nodeManager.getNodeStatus(srcDn);
NodeState healthStat = currentNodeStat.getHealth();
NodeOperationalState operationalState = currentNodeStat.getOperationalState();
if (healthStat != NodeState.HEALTHY) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
return ret;
}
if (operationalState != NodeOperationalState.IN_SERVICE) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
return ret;
}
currentNodeStat = nodeManager.getNodeStatus(targetDn);
healthStat = currentNodeStat.getHealth();
operationalState = currentNodeStat.getOperationalState();
if (healthStat != NodeState.HEALTHY) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
return ret;
}
if (operationalState != NodeOperationalState.IN_SERVICE) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
return ret;
}
// we need to synchronize on ContainerInfo, since it is
// shared by ICR/FCR handler and this.processContainer
// TODO: use a Read lock after introducing a RW lock into ContainerInfo
ContainerInfo cif = containerManager.getContainer(cid);
synchronized (cif) {
final Set<ContainerReplica> currentReplicas = containerManager.getContainerReplicas(cid);
final Set<DatanodeDetails> replicas = currentReplicas.stream().map(ContainerReplica::getDatanodeDetails).collect(Collectors.toSet());
if (replicas.contains(targetDn)) {
ret.complete(MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET);
return ret;
}
if (!replicas.contains(srcDn)) {
ret.complete(MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE);
return ret;
}
if (inflightReplication.containsKey(cid)) {
ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION);
return ret;
}
if (inflightDeletion.containsKey(cid)) {
ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION);
return ret;
}
/*
* here, no need to see whether cid is in inflightMove, because
* these three map are all synchronized on ContainerInfo, if cid
* is in infligtMove , it must now being replicated or deleted,
* so it must be in inflightReplication or in infligthDeletion.
* thus, if we can not find cid in both of them , this cid must
* not be in inflightMove.
*/
LifeCycleState currentContainerStat = cif.getState();
if (currentContainerStat != LifeCycleState.CLOSED) {
ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
return ret;
}
// satisfies current placement policy
if (!isPolicySatisfiedAfterMove(cif, srcDn, targetDn, currentReplicas.stream().collect(Collectors.toList()))) {
ret.complete(MoveResult.PLACEMENT_POLICY_NOT_SATISFIED);
return ret;
}
try {
moveScheduler.startMove(cid.getProtobuf(), mp.getProtobufMessage(CURRENT_VERSION));
} catch (IOException e) {
LOG.warn("Exception while starting move {}", cid);
ret.complete(MoveResult.FAIL_CAN_NOT_RECORD_TO_DB);
return ret;
}
inflightMoveFuture.putIfAbsent(cid, ret);
sendReplicateCommand(cif, targetDn, Collections.singletonList(srcDn));
}
LOG.info("receive a move request about container {} , from {} to {}", cid, srcDn.getUuid(), targetDn.getUuid());
return ret;
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.
the class TestHDDSUpgrade method testDataNodesStateOnSCM.
/*
* Helper function to test DataNode state on the SCM. Note that due to
* timing constraints, sometime the node-state can transition to the next
* state. This function expects the DataNode to be in NodeState "state" or
* "alternateState". Some tests can enforce a unique NodeState test by
* setting "alternateState = null".
*/
private void testDataNodesStateOnSCM(NodeState state, NodeState alternateState) {
int countNodes = 0;
for (DatanodeDetails dn : scm.getScmNodeManager().getAllNodes()) {
try {
NodeState dnState = scm.getScmNodeManager().getNodeStatus(dn).getHealth();
Assert.assertTrue((dnState == state) || (alternateState == null ? false : dnState == alternateState));
} catch (NodeNotFoundException e) {
e.printStackTrace();
Assert.fail("Node not found");
}
++countNodes;
}
Assert.assertEquals(NUM_DATA_NODES, countNodes);
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.
the class NodeEndpoint method getDatanodes.
/**
* Return the list of datanodes with detailed information about each datanode.
* @return {@link Response}
*/
@GET
public Response getDatanodes() {
List<DatanodeMetadata> datanodes = new ArrayList<>();
List<DatanodeDetails> datanodeDetails = nodeManager.getAllNodes();
datanodeDetails.forEach(datanode -> {
DatanodeStorageReport storageReport = getStorageReport(datanode);
NodeState nodeState = null;
try {
nodeState = nodeManager.getNodeStatus(datanode).getHealth();
} catch (NodeNotFoundException e) {
LOG.warn("Cannot get nodeState for datanode {}", datanode, e);
}
final NodeOperationalState nodeOpState = datanode.getPersistedOpState();
String hostname = datanode.getHostName();
Set<PipelineID> pipelineIDs = nodeManager.getPipelines(datanode);
List<DatanodePipeline> pipelines = new ArrayList<>();
AtomicInteger leaderCount = new AtomicInteger();
AtomicInteger openContainers = new AtomicInteger();
DatanodeMetadata.Builder builder = DatanodeMetadata.newBuilder();
pipelineIDs.forEach(pipelineID -> {
try {
Pipeline pipeline = pipelineManager.getPipeline(pipelineID);
String leaderNode = pipeline.getLeaderNode().getHostName();
DatanodePipeline datanodePipeline = new DatanodePipeline(pipelineID.getId(), pipeline.getReplicationConfig().getReplicationType().toString(), ReplicationConfig.getLegacyFactor(pipeline.getReplicationConfig()).getNumber(), leaderNode);
pipelines.add(datanodePipeline);
if (datanode.getUuid().equals(pipeline.getLeaderId())) {
leaderCount.getAndIncrement();
}
int openContainerPerPipeline = reconContainerManager.getPipelineToOpenContainer().getOrDefault(pipelineID, 0);
openContainers.getAndAdd(openContainerPerPipeline);
} catch (PipelineNotFoundException ex) {
LOG.warn("Cannot get pipeline {} for datanode {}, pipeline not found", pipelineID.getId(), hostname, ex);
} catch (IOException ioEx) {
LOG.warn("Cannot get leader node of pipeline with id {}.", pipelineID.getId(), ioEx);
}
});
try {
Set<ContainerID> allContainers = nodeManager.getContainers(datanode);
builder.withContainers(allContainers.size());
builder.withOpenContainers(openContainers.get());
} catch (NodeNotFoundException ex) {
LOG.warn("Cannot get containers, datanode {} not found.", datanode.getUuid(), ex);
}
DatanodeInfo dnInfo = (DatanodeInfo) datanode;
datanodes.add(builder.withHostname(nodeManager.getHostName(datanode)).withDatanodeStorageReport(storageReport).withLastHeartbeat(nodeManager.getLastHeartbeat(datanode)).withState(nodeState).withOperationalState(nodeOpState).withPipelines(pipelines).withLeaderCount(leaderCount.get()).withUUid(datanode.getUuidString()).withVersion(nodeManager.getVersion(datanode)).withSetupTime(nodeManager.getSetupTime(datanode)).withRevision(nodeManager.getRevision(datanode)).withBuildDate(nodeManager.getBuildDate(datanode)).withLayoutVersion(dnInfo.getLastKnownLayoutVersion().getMetadataLayoutVersion()).build());
});
DatanodesResponse datanodesResponse = new DatanodesResponse(datanodes.size(), datanodes);
return Response.ok(datanodesResponse).build();
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.
the class TestNodeStateMap method testGetNodeMethodsReturnCorrectCountsAndStates.
@Test
public void testGetNodeMethodsReturnCorrectCountsAndStates() throws NodeAlreadyExistsException {
// Add one node for all possible states
int nodeCount = 0;
for (NodeOperationalState op : NodeOperationalState.values()) {
for (NodeState health : NodeState.values()) {
addRandomNodeWithState(op, health);
nodeCount++;
}
}
NodeStatus requestedState = NodeStatus.inServiceStale();
List<UUID> nodes = map.getNodes(requestedState);
assertEquals(1, nodes.size());
assertEquals(1, map.getNodeCount(requestedState));
assertEquals(nodeCount, map.getTotalNodeCount());
assertEquals(nodeCount, map.getAllNodes().size());
assertEquals(nodeCount, map.getAllDatanodeInfos().size());
// Checks for the getNodeCount(opstate, health) method
assertEquals(nodeCount, map.getNodeCount(null, null));
assertEquals(1, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, NodeState.STALE));
assertEquals(5, map.getNodeCount(null, NodeState.HEALTHY));
assertEquals(4, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, null));
}
Aggregations