use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.
the class ReplicationManager method move.
/**
* add a move action for a given container.
*
* @param cid Container to move
* @param mp MoveDataNodePair which contains source and target datanodes
*/
public CompletableFuture<MoveResult> move(ContainerID cid, MoveDataNodePair mp) throws ContainerNotFoundException, NodeNotFoundException {
CompletableFuture<MoveResult> ret = new CompletableFuture<>();
if (!isRunning()) {
ret.complete(MoveResult.FAIL_NOT_RUNNING);
return ret;
}
if (!scmContext.isLeader()) {
ret.complete(MoveResult.FAIL_NOT_LEADER);
return ret;
}
/*
* make sure the flowing conditions are met:
* 1 the given two datanodes are in healthy state
* 2 the given container exists on the given source datanode
* 3 the given container does not exist on the given target datanode
* 4 the given container is in closed state
* 5 the giver container is not taking any inflight action
* 6 the given two datanodes are in IN_SERVICE state
* 7 {Existing replicas + Target_Dn - Source_Dn} satisfies
* the placement policy
*
* move is a combination of two steps : replication and deletion.
* if the conditions above are all met, then we take a conservative
* strategy here : replication can always be executed, but the execution
* of deletion always depends on placement policy
*/
DatanodeDetails srcDn = mp.getSrc();
DatanodeDetails targetDn = mp.getTgt();
NodeStatus currentNodeStat = nodeManager.getNodeStatus(srcDn);
NodeState healthStat = currentNodeStat.getHealth();
NodeOperationalState operationalState = currentNodeStat.getOperationalState();
if (healthStat != NodeState.HEALTHY) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
return ret;
}
if (operationalState != NodeOperationalState.IN_SERVICE) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
return ret;
}
currentNodeStat = nodeManager.getNodeStatus(targetDn);
healthStat = currentNodeStat.getHealth();
operationalState = currentNodeStat.getOperationalState();
if (healthStat != NodeState.HEALTHY) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
return ret;
}
if (operationalState != NodeOperationalState.IN_SERVICE) {
ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
return ret;
}
// we need to synchronize on ContainerInfo, since it is
// shared by ICR/FCR handler and this.processContainer
// TODO: use a Read lock after introducing a RW lock into ContainerInfo
ContainerInfo cif = containerManager.getContainer(cid);
synchronized (cif) {
final Set<ContainerReplica> currentReplicas = containerManager.getContainerReplicas(cid);
final Set<DatanodeDetails> replicas = currentReplicas.stream().map(ContainerReplica::getDatanodeDetails).collect(Collectors.toSet());
if (replicas.contains(targetDn)) {
ret.complete(MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET);
return ret;
}
if (!replicas.contains(srcDn)) {
ret.complete(MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE);
return ret;
}
if (inflightReplication.containsKey(cid)) {
ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION);
return ret;
}
if (inflightDeletion.containsKey(cid)) {
ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION);
return ret;
}
/*
* here, no need to see whether cid is in inflightMove, because
* these three map are all synchronized on ContainerInfo, if cid
* is in infligtMove , it must now being replicated or deleted,
* so it must be in inflightReplication or in infligthDeletion.
* thus, if we can not find cid in both of them , this cid must
* not be in inflightMove.
*/
LifeCycleState currentContainerStat = cif.getState();
if (currentContainerStat != LifeCycleState.CLOSED) {
ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
return ret;
}
// satisfies current placement policy
if (!isPolicySatisfiedAfterMove(cif, srcDn, targetDn, currentReplicas.stream().collect(Collectors.toList()))) {
ret.complete(MoveResult.PLACEMENT_POLICY_NOT_SATISFIED);
return ret;
}
try {
moveScheduler.startMove(cid.getProtobuf(), mp.getProtobufMessage(CURRENT_VERSION));
} catch (IOException e) {
LOG.warn("Exception while starting move {}", cid);
ret.complete(MoveResult.FAIL_CAN_NOT_RECORD_TO_DB);
return ret;
}
inflightMoveFuture.putIfAbsent(cid, ret);
sendReplicateCommand(cif, targetDn, Collections.singletonList(srcDn));
}
LOG.info("receive a move request about container {} , from {} to {}", cid, srcDn.getUuid(), targetDn.getUuid());
return ret;
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.
the class NodeEndpoint method getDatanodes.
/**
* Return the list of datanodes with detailed information about each datanode.
* @return {@link Response}
*/
@GET
public Response getDatanodes() {
List<DatanodeMetadata> datanodes = new ArrayList<>();
List<DatanodeDetails> datanodeDetails = nodeManager.getAllNodes();
datanodeDetails.forEach(datanode -> {
DatanodeStorageReport storageReport = getStorageReport(datanode);
NodeState nodeState = null;
try {
nodeState = nodeManager.getNodeStatus(datanode).getHealth();
} catch (NodeNotFoundException e) {
LOG.warn("Cannot get nodeState for datanode {}", datanode, e);
}
final NodeOperationalState nodeOpState = datanode.getPersistedOpState();
String hostname = datanode.getHostName();
Set<PipelineID> pipelineIDs = nodeManager.getPipelines(datanode);
List<DatanodePipeline> pipelines = new ArrayList<>();
AtomicInteger leaderCount = new AtomicInteger();
AtomicInteger openContainers = new AtomicInteger();
DatanodeMetadata.Builder builder = DatanodeMetadata.newBuilder();
pipelineIDs.forEach(pipelineID -> {
try {
Pipeline pipeline = pipelineManager.getPipeline(pipelineID);
String leaderNode = pipeline.getLeaderNode().getHostName();
DatanodePipeline datanodePipeline = new DatanodePipeline(pipelineID.getId(), pipeline.getReplicationConfig().getReplicationType().toString(), ReplicationConfig.getLegacyFactor(pipeline.getReplicationConfig()).getNumber(), leaderNode);
pipelines.add(datanodePipeline);
if (datanode.getUuid().equals(pipeline.getLeaderId())) {
leaderCount.getAndIncrement();
}
int openContainerPerPipeline = reconContainerManager.getPipelineToOpenContainer().getOrDefault(pipelineID, 0);
openContainers.getAndAdd(openContainerPerPipeline);
} catch (PipelineNotFoundException ex) {
LOG.warn("Cannot get pipeline {} for datanode {}, pipeline not found", pipelineID.getId(), hostname, ex);
} catch (IOException ioEx) {
LOG.warn("Cannot get leader node of pipeline with id {}.", pipelineID.getId(), ioEx);
}
});
try {
Set<ContainerID> allContainers = nodeManager.getContainers(datanode);
builder.withContainers(allContainers.size());
builder.withOpenContainers(openContainers.get());
} catch (NodeNotFoundException ex) {
LOG.warn("Cannot get containers, datanode {} not found.", datanode.getUuid(), ex);
}
DatanodeInfo dnInfo = (DatanodeInfo) datanode;
datanodes.add(builder.withHostname(nodeManager.getHostName(datanode)).withDatanodeStorageReport(storageReport).withLastHeartbeat(nodeManager.getLastHeartbeat(datanode)).withState(nodeState).withOperationalState(nodeOpState).withPipelines(pipelines).withLeaderCount(leaderCount.get()).withUUid(datanode.getUuidString()).withVersion(nodeManager.getVersion(datanode)).withSetupTime(nodeManager.getSetupTime(datanode)).withRevision(nodeManager.getRevision(datanode)).withBuildDate(nodeManager.getBuildDate(datanode)).withLayoutVersion(dnInfo.getLastKnownLayoutVersion().getMetadataLayoutVersion()).build());
});
DatanodesResponse datanodesResponse = new DatanodesResponse(datanodes.size(), datanodes);
return Response.ok(datanodesResponse).build();
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.
the class TestNodeStateMap method testGetNodeMethodsReturnCorrectCountsAndStates.
@Test
public void testGetNodeMethodsReturnCorrectCountsAndStates() throws NodeAlreadyExistsException {
// Add one node for all possible states
int nodeCount = 0;
for (NodeOperationalState op : NodeOperationalState.values()) {
for (NodeState health : NodeState.values()) {
addRandomNodeWithState(op, health);
nodeCount++;
}
}
NodeStatus requestedState = NodeStatus.inServiceStale();
List<UUID> nodes = map.getNodes(requestedState);
assertEquals(1, nodes.size());
assertEquals(1, map.getNodeCount(requestedState));
assertEquals(nodeCount, map.getTotalNodeCount());
assertEquals(nodeCount, map.getAllNodes().size());
assertEquals(nodeCount, map.getAllDatanodeInfos().size());
// Checks for the getNodeCount(opstate, health) method
assertEquals(nodeCount, map.getNodeCount(null, null));
assertEquals(1, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, NodeState.STALE));
assertEquals(5, map.getNodeCount(null, NodeState.HEALTHY));
assertEquals(4, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, null));
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.
the class MockNodeManager method getNodeCount.
@Override
public Map<String, Map<String, Integer>> getNodeCount() {
Map<String, Map<String, Integer>> nodes = new HashMap<>();
for (NodeOperationalState opState : NodeOperationalState.values()) {
Map<String, Integer> states = new HashMap<>();
for (HddsProtos.NodeState health : HddsProtos.NodeState.values()) {
states.put(health.name(), 0);
}
nodes.put(opState.name(), states);
}
// are IN_SERVICE. This will be fixed as part of HDDS-2673
for (HddsProtos.NodeState state : HddsProtos.NodeState.values()) {
nodes.get(NodeOperationalState.IN_SERVICE.name()).compute(state.name(), (k, v) -> v + 1);
}
return nodes;
}
use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.
the class TestEndpoints method testGetDatanodes.
@Test
public void testGetDatanodes() throws Exception {
Response response = nodeEndpoint.getDatanodes();
DatanodesResponse datanodesResponse = (DatanodesResponse) response.getEntity();
Assert.assertEquals(2, datanodesResponse.getTotalCount());
Assert.assertEquals(2, datanodesResponse.getDatanodes().size());
datanodesResponse.getDatanodes().forEach(datanodeMetadata -> {
try {
testDatanodeResponse(datanodeMetadata);
} catch (IOException e) {
Assert.fail(e.getMessage());
}
});
waitAndCheckConditionAfterHeartbeat(() -> {
Response response1 = nodeEndpoint.getDatanodes();
DatanodesResponse datanodesResponse1 = (DatanodesResponse) response1.getEntity();
DatanodeMetadata datanodeMetadata1 = datanodesResponse1.getDatanodes().stream().filter(datanodeMetadata -> datanodeMetadata.getHostname().equals("host1.datanode")).findFirst().orElse(null);
return (datanodeMetadata1 != null && datanodeMetadata1.getContainers() == 1 && datanodeMetadata1.getOpenContainers() == 1 && reconScm.getPipelineManager().getContainersInPipeline(pipeline.getId()).size() == 1);
});
// Change Node OperationalState with NodeManager
final NodeManager nodeManager = reconScm.getScmNodeManager();
final DatanodeDetails dnDetailsInternal = nodeManager.getNodeByUuid(datanodeDetails.getUuidString());
// Backup existing state and sanity check
final NodeStatus nStatus = nodeManager.getNodeStatus(dnDetailsInternal);
final NodeOperationalState backupOpState = dnDetailsInternal.getPersistedOpState();
final long backupOpStateExpiry = dnDetailsInternal.getPersistedOpStateExpiryEpochSec();
assertEquals(backupOpState, nStatus.getOperationalState());
assertEquals(backupOpStateExpiry, nStatus.getOpStateExpiryEpochSeconds());
dnDetailsInternal.setPersistedOpState(NodeOperationalState.DECOMMISSIONING);
dnDetailsInternal.setPersistedOpStateExpiryEpochSec(666L);
nodeManager.setNodeOperationalState(dnDetailsInternal, NodeOperationalState.DECOMMISSIONING, 666L);
// Check if the endpoint response reflects the change
response = nodeEndpoint.getDatanodes();
datanodesResponse = (DatanodesResponse) response.getEntity();
// Order of datanodes in the response is random
AtomicInteger count = new AtomicInteger();
datanodesResponse.getDatanodes().forEach(metadata -> {
if (metadata.getUuid().equals(dnDetailsInternal.getUuidString())) {
count.incrementAndGet();
assertEquals(NodeOperationalState.DECOMMISSIONING, metadata.getOperationalState());
}
});
assertEquals(1, count.get());
// Restore state
dnDetailsInternal.setPersistedOpState(backupOpState);
dnDetailsInternal.setPersistedOpStateExpiryEpochSec(backupOpStateExpiry);
nodeManager.setNodeOperationalState(dnDetailsInternal, backupOpState, backupOpStateExpiry);
}
Aggregations