Search in sources :

Example 1 with NodeOperationalState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.

the class ReplicationManager method move.

/**
 * add a move action for a given container.
 *
 * @param cid Container to move
 * @param mp MoveDataNodePair which contains source and target datanodes
 */
public CompletableFuture<MoveResult> move(ContainerID cid, MoveDataNodePair mp) throws ContainerNotFoundException, NodeNotFoundException {
    CompletableFuture<MoveResult> ret = new CompletableFuture<>();
    if (!isRunning()) {
        ret.complete(MoveResult.FAIL_NOT_RUNNING);
        return ret;
    }
    if (!scmContext.isLeader()) {
        ret.complete(MoveResult.FAIL_NOT_LEADER);
        return ret;
    }
    /*
     * make sure the flowing conditions are met:
     *  1 the given two datanodes are in healthy state
     *  2 the given container exists on the given source datanode
     *  3 the given container does not exist on the given target datanode
     *  4 the given container is in closed state
     *  5 the giver container is not taking any inflight action
     *  6 the given two datanodes are in IN_SERVICE state
     *  7 {Existing replicas + Target_Dn - Source_Dn} satisfies
     *     the placement policy
     *
     * move is a combination of two steps : replication and deletion.
     * if the conditions above are all met, then we take a conservative
     * strategy here : replication can always be executed, but the execution
     * of deletion always depends on placement policy
     */
    DatanodeDetails srcDn = mp.getSrc();
    DatanodeDetails targetDn = mp.getTgt();
    NodeStatus currentNodeStat = nodeManager.getNodeStatus(srcDn);
    NodeState healthStat = currentNodeStat.getHealth();
    NodeOperationalState operationalState = currentNodeStat.getOperationalState();
    if (healthStat != NodeState.HEALTHY) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
        return ret;
    }
    if (operationalState != NodeOperationalState.IN_SERVICE) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
        return ret;
    }
    currentNodeStat = nodeManager.getNodeStatus(targetDn);
    healthStat = currentNodeStat.getHealth();
    operationalState = currentNodeStat.getOperationalState();
    if (healthStat != NodeState.HEALTHY) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
        return ret;
    }
    if (operationalState != NodeOperationalState.IN_SERVICE) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
        return ret;
    }
    // we need to synchronize on ContainerInfo, since it is
    // shared by ICR/FCR handler and this.processContainer
    // TODO: use a Read lock after introducing a RW lock into ContainerInfo
    ContainerInfo cif = containerManager.getContainer(cid);
    synchronized (cif) {
        final Set<ContainerReplica> currentReplicas = containerManager.getContainerReplicas(cid);
        final Set<DatanodeDetails> replicas = currentReplicas.stream().map(ContainerReplica::getDatanodeDetails).collect(Collectors.toSet());
        if (replicas.contains(targetDn)) {
            ret.complete(MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET);
            return ret;
        }
        if (!replicas.contains(srcDn)) {
            ret.complete(MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE);
            return ret;
        }
        if (inflightReplication.containsKey(cid)) {
            ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION);
            return ret;
        }
        if (inflightDeletion.containsKey(cid)) {
            ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION);
            return ret;
        }
        /*
      * here, no need to see whether cid is in inflightMove, because
      * these three map are all synchronized on ContainerInfo, if cid
      * is in infligtMove , it must now being replicated or deleted,
      * so it must be in inflightReplication or in infligthDeletion.
      * thus, if we can not find cid in both of them , this cid must
      * not be in inflightMove.
      */
        LifeCycleState currentContainerStat = cif.getState();
        if (currentContainerStat != LifeCycleState.CLOSED) {
            ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
            return ret;
        }
        // satisfies current placement policy
        if (!isPolicySatisfiedAfterMove(cif, srcDn, targetDn, currentReplicas.stream().collect(Collectors.toList()))) {
            ret.complete(MoveResult.PLACEMENT_POLICY_NOT_SATISFIED);
            return ret;
        }
        try {
            moveScheduler.startMove(cid.getProtobuf(), mp.getProtobufMessage(CURRENT_VERSION));
        } catch (IOException e) {
            LOG.warn("Exception while starting move {}", cid);
            ret.complete(MoveResult.FAIL_CAN_NOT_RECORD_TO_DB);
            return ret;
        }
        inflightMoveFuture.putIfAbsent(cid, ret);
        sendReplicateCommand(cif, targetDn, Collections.singletonList(srcDn));
    }
    LOG.info("receive a move request about container {} , from {} to {}", cid, srcDn.getUuid(), targetDn.getUuid());
    return ret;
}
Also used : NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus)

Example 2 with NodeOperationalState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.

the class NodeEndpoint method getDatanodes.

/**
 * Return the list of datanodes with detailed information about each datanode.
 * @return {@link Response}
 */
@GET
public Response getDatanodes() {
    List<DatanodeMetadata> datanodes = new ArrayList<>();
    List<DatanodeDetails> datanodeDetails = nodeManager.getAllNodes();
    datanodeDetails.forEach(datanode -> {
        DatanodeStorageReport storageReport = getStorageReport(datanode);
        NodeState nodeState = null;
        try {
            nodeState = nodeManager.getNodeStatus(datanode).getHealth();
        } catch (NodeNotFoundException e) {
            LOG.warn("Cannot get nodeState for datanode {}", datanode, e);
        }
        final NodeOperationalState nodeOpState = datanode.getPersistedOpState();
        String hostname = datanode.getHostName();
        Set<PipelineID> pipelineIDs = nodeManager.getPipelines(datanode);
        List<DatanodePipeline> pipelines = new ArrayList<>();
        AtomicInteger leaderCount = new AtomicInteger();
        AtomicInteger openContainers = new AtomicInteger();
        DatanodeMetadata.Builder builder = DatanodeMetadata.newBuilder();
        pipelineIDs.forEach(pipelineID -> {
            try {
                Pipeline pipeline = pipelineManager.getPipeline(pipelineID);
                String leaderNode = pipeline.getLeaderNode().getHostName();
                DatanodePipeline datanodePipeline = new DatanodePipeline(pipelineID.getId(), pipeline.getReplicationConfig().getReplicationType().toString(), ReplicationConfig.getLegacyFactor(pipeline.getReplicationConfig()).getNumber(), leaderNode);
                pipelines.add(datanodePipeline);
                if (datanode.getUuid().equals(pipeline.getLeaderId())) {
                    leaderCount.getAndIncrement();
                }
                int openContainerPerPipeline = reconContainerManager.getPipelineToOpenContainer().getOrDefault(pipelineID, 0);
                openContainers.getAndAdd(openContainerPerPipeline);
            } catch (PipelineNotFoundException ex) {
                LOG.warn("Cannot get pipeline {} for datanode {}, pipeline not found", pipelineID.getId(), hostname, ex);
            } catch (IOException ioEx) {
                LOG.warn("Cannot get leader node of pipeline with id {}.", pipelineID.getId(), ioEx);
            }
        });
        try {
            Set<ContainerID> allContainers = nodeManager.getContainers(datanode);
            builder.withContainers(allContainers.size());
            builder.withOpenContainers(openContainers.get());
        } catch (NodeNotFoundException ex) {
            LOG.warn("Cannot get containers, datanode {} not found.", datanode.getUuid(), ex);
        }
        DatanodeInfo dnInfo = (DatanodeInfo) datanode;
        datanodes.add(builder.withHostname(nodeManager.getHostName(datanode)).withDatanodeStorageReport(storageReport).withLastHeartbeat(nodeManager.getLastHeartbeat(datanode)).withState(nodeState).withOperationalState(nodeOpState).withPipelines(pipelines).withLeaderCount(leaderCount.get()).withUUid(datanode.getUuidString()).withVersion(nodeManager.getVersion(datanode)).withSetupTime(nodeManager.getSetupTime(datanode)).withRevision(nodeManager.getRevision(datanode)).withBuildDate(nodeManager.getBuildDate(datanode)).withLayoutVersion(dnInfo.getLastKnownLayoutVersion().getMetadataLayoutVersion()).build());
    });
    DatanodesResponse datanodesResponse = new DatanodesResponse(datanodes.size(), datanodes);
    return Response.ok(datanodesResponse).build();
}
Also used : DatanodeInfo(org.apache.hadoop.hdds.scm.node.DatanodeInfo) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) DatanodesResponse(org.apache.hadoop.ozone.recon.api.types.DatanodesResponse) DatanodeStorageReport(org.apache.hadoop.ozone.recon.api.types.DatanodeStorageReport) DatanodeMetadata(org.apache.hadoop.ozone.recon.api.types.DatanodeMetadata) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DatanodePipeline(org.apache.hadoop.ozone.recon.api.types.DatanodePipeline) DatanodePipeline(org.apache.hadoop.ozone.recon.api.types.DatanodePipeline) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) PipelineNotFoundException(org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException) GET(javax.ws.rs.GET)

Example 3 with NodeOperationalState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.

the class TestNodeStateMap method testGetNodeMethodsReturnCorrectCountsAndStates.

@Test
public void testGetNodeMethodsReturnCorrectCountsAndStates() throws NodeAlreadyExistsException {
    // Add one node for all possible states
    int nodeCount = 0;
    for (NodeOperationalState op : NodeOperationalState.values()) {
        for (NodeState health : NodeState.values()) {
            addRandomNodeWithState(op, health);
            nodeCount++;
        }
    }
    NodeStatus requestedState = NodeStatus.inServiceStale();
    List<UUID> nodes = map.getNodes(requestedState);
    assertEquals(1, nodes.size());
    assertEquals(1, map.getNodeCount(requestedState));
    assertEquals(nodeCount, map.getTotalNodeCount());
    assertEquals(nodeCount, map.getAllNodes().size());
    assertEquals(nodeCount, map.getAllDatanodeInfos().size());
    // Checks for the getNodeCount(opstate, health) method
    assertEquals(nodeCount, map.getNodeCount(null, null));
    assertEquals(1, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, NodeState.STALE));
    assertEquals(5, map.getNodeCount(null, NodeState.HEALTHY));
    assertEquals(4, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, null));
}
Also used : NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) UUID(java.util.UUID) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Example 4 with NodeOperationalState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.

the class MockNodeManager method getNodeCount.

@Override
public Map<String, Map<String, Integer>> getNodeCount() {
    Map<String, Map<String, Integer>> nodes = new HashMap<>();
    for (NodeOperationalState opState : NodeOperationalState.values()) {
        Map<String, Integer> states = new HashMap<>();
        for (HddsProtos.NodeState health : HddsProtos.NodeState.values()) {
            states.put(health.name(), 0);
        }
        nodes.put(opState.name(), states);
    }
    // are IN_SERVICE. This will be fixed as part of HDDS-2673
    for (HddsProtos.NodeState state : HddsProtos.NodeState.values()) {
        nodes.get(NodeOperationalState.IN_SERVICE.name()).compute(state.name(), (k, v) -> v + 1);
    }
    return nodes;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) HddsProtos(org.apache.hadoop.hdds.protocol.proto.HddsProtos) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Node2ContainerMap(org.apache.hadoop.hdds.scm.node.states.Node2ContainerMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Node2PipelineMap(org.apache.hadoop.hdds.scm.node.states.Node2PipelineMap)

Example 5 with NodeOperationalState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState in project ozone by apache.

the class TestEndpoints method testGetDatanodes.

@Test
public void testGetDatanodes() throws Exception {
    Response response = nodeEndpoint.getDatanodes();
    DatanodesResponse datanodesResponse = (DatanodesResponse) response.getEntity();
    Assert.assertEquals(2, datanodesResponse.getTotalCount());
    Assert.assertEquals(2, datanodesResponse.getDatanodes().size());
    datanodesResponse.getDatanodes().forEach(datanodeMetadata -> {
        try {
            testDatanodeResponse(datanodeMetadata);
        } catch (IOException e) {
            Assert.fail(e.getMessage());
        }
    });
    waitAndCheckConditionAfterHeartbeat(() -> {
        Response response1 = nodeEndpoint.getDatanodes();
        DatanodesResponse datanodesResponse1 = (DatanodesResponse) response1.getEntity();
        DatanodeMetadata datanodeMetadata1 = datanodesResponse1.getDatanodes().stream().filter(datanodeMetadata -> datanodeMetadata.getHostname().equals("host1.datanode")).findFirst().orElse(null);
        return (datanodeMetadata1 != null && datanodeMetadata1.getContainers() == 1 && datanodeMetadata1.getOpenContainers() == 1 && reconScm.getPipelineManager().getContainersInPipeline(pipeline.getId()).size() == 1);
    });
    // Change Node OperationalState with NodeManager
    final NodeManager nodeManager = reconScm.getScmNodeManager();
    final DatanodeDetails dnDetailsInternal = nodeManager.getNodeByUuid(datanodeDetails.getUuidString());
    // Backup existing state and sanity check
    final NodeStatus nStatus = nodeManager.getNodeStatus(dnDetailsInternal);
    final NodeOperationalState backupOpState = dnDetailsInternal.getPersistedOpState();
    final long backupOpStateExpiry = dnDetailsInternal.getPersistedOpStateExpiryEpochSec();
    assertEquals(backupOpState, nStatus.getOperationalState());
    assertEquals(backupOpStateExpiry, nStatus.getOpStateExpiryEpochSeconds());
    dnDetailsInternal.setPersistedOpState(NodeOperationalState.DECOMMISSIONING);
    dnDetailsInternal.setPersistedOpStateExpiryEpochSec(666L);
    nodeManager.setNodeOperationalState(dnDetailsInternal, NodeOperationalState.DECOMMISSIONING, 666L);
    // Check if the endpoint response reflects the change
    response = nodeEndpoint.getDatanodes();
    datanodesResponse = (DatanodesResponse) response.getEntity();
    // Order of datanodes in the response is random
    AtomicInteger count = new AtomicInteger();
    datanodesResponse.getDatanodes().forEach(metadata -> {
        if (metadata.getUuid().equals(dnDetailsInternal.getUuidString())) {
            count.incrementAndGet();
            assertEquals(NodeOperationalState.DECOMMISSIONING, metadata.getOperationalState());
        }
    });
    assertEquals(1, count.get());
    // Restore state
    dnDetailsInternal.setPersistedOpState(backupOpState);
    dnDetailsInternal.setPersistedOpStateExpiryEpochSec(backupOpStateExpiry);
    nodeManager.setNodeOperationalState(dnDetailsInternal, backupOpState, backupOpStateExpiry);
}
Also used : ClusterStateResponse(org.apache.hadoop.ozone.recon.api.types.ClusterStateResponse) DatanodesResponse(org.apache.hadoop.ozone.recon.api.types.DatanodesResponse) Response(javax.ws.rs.core.Response) HttpServletResponse(javax.servlet.http.HttpServletResponse) PipelinesResponse(org.apache.hadoop.ozone.recon.api.types.PipelinesResponse) NodeManager(org.apache.hadoop.hdds.scm.node.NodeManager) DatanodesResponse(org.apache.hadoop.ozone.recon.api.types.DatanodesResponse) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DatanodeMetadata(org.apache.hadoop.ozone.recon.api.types.DatanodeMetadata) MockDatanodeDetails.randomDatanodeDetails(org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) IOException(java.io.IOException) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test) AbstractReconSqlDBTest(org.apache.hadoop.ozone.recon.persistence.AbstractReconSqlDBTest)

Aggregations

NodeOperationalState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState)11 NodeState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)5 IOException (java.io.IOException)3 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)3 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)2 DatanodeMetadata (org.apache.hadoop.ozone.recon.api.types.DatanodeMetadata)2 DatanodesResponse (org.apache.hadoop.ozone.recon.api.types.DatanodesResponse)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 UUID (java.util.UUID)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 HttpServletResponse (javax.servlet.http.HttpServletResponse)1 GET (javax.ws.rs.GET)1 Response (javax.ws.rs.core.Response)1