Search in sources :

Example 1 with NodeState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.

the class NodeStateManager method updateNodeLayoutVersionState.

/**
 * Updates the node state if the condition satisfies.
 *
 * @param node DatanodeInfo
 * @param condition condition to check
 * @param status current state of node
 * @param lifeCycleEvent NodeLifeCycleEvent to be applied if condition
 *                       matches
 *
 * @throws NodeNotFoundException if the node is not present
 */
private void updateNodeLayoutVersionState(DatanodeInfo node, Predicate<LayoutVersionProto> condition, NodeStatus status, NodeLifeCycleEvent lifeCycleEvent) throws NodeNotFoundException {
    try {
        if (condition.test(node.getLastKnownLayoutVersion())) {
            NodeState newHealthState = nodeHealthSM.getNextState(status.getHealth(), lifeCycleEvent);
            NodeStatus newStatus = nodeStateMap.updateNodeHealthState(node.getUuid(), newHealthState);
            fireHealthStateEvent(newStatus.getHealth(), node);
        }
    } catch (InvalidStateTransitionException e) {
        LOG.warn("Invalid state transition of node {}." + " Current state: {}, life cycle event: {}", node, status, lifeCycleEvent);
    }
}
Also used : NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException)

Example 2 with NodeState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.

the class ReplicationManager method move.

/**
 * add a move action for a given container.
 *
 * @param cid Container to move
 * @param mp MoveDataNodePair which contains source and target datanodes
 */
public CompletableFuture<MoveResult> move(ContainerID cid, MoveDataNodePair mp) throws ContainerNotFoundException, NodeNotFoundException {
    CompletableFuture<MoveResult> ret = new CompletableFuture<>();
    if (!isRunning()) {
        ret.complete(MoveResult.FAIL_NOT_RUNNING);
        return ret;
    }
    if (!scmContext.isLeader()) {
        ret.complete(MoveResult.FAIL_NOT_LEADER);
        return ret;
    }
    /*
     * make sure the flowing conditions are met:
     *  1 the given two datanodes are in healthy state
     *  2 the given container exists on the given source datanode
     *  3 the given container does not exist on the given target datanode
     *  4 the given container is in closed state
     *  5 the giver container is not taking any inflight action
     *  6 the given two datanodes are in IN_SERVICE state
     *  7 {Existing replicas + Target_Dn - Source_Dn} satisfies
     *     the placement policy
     *
     * move is a combination of two steps : replication and deletion.
     * if the conditions above are all met, then we take a conservative
     * strategy here : replication can always be executed, but the execution
     * of deletion always depends on placement policy
     */
    DatanodeDetails srcDn = mp.getSrc();
    DatanodeDetails targetDn = mp.getTgt();
    NodeStatus currentNodeStat = nodeManager.getNodeStatus(srcDn);
    NodeState healthStat = currentNodeStat.getHealth();
    NodeOperationalState operationalState = currentNodeStat.getOperationalState();
    if (healthStat != NodeState.HEALTHY) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
        return ret;
    }
    if (operationalState != NodeOperationalState.IN_SERVICE) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
        return ret;
    }
    currentNodeStat = nodeManager.getNodeStatus(targetDn);
    healthStat = currentNodeStat.getHealth();
    operationalState = currentNodeStat.getOperationalState();
    if (healthStat != NodeState.HEALTHY) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY);
        return ret;
    }
    if (operationalState != NodeOperationalState.IN_SERVICE) {
        ret.complete(MoveResult.REPLICATION_FAIL_NODE_NOT_IN_SERVICE);
        return ret;
    }
    // we need to synchronize on ContainerInfo, since it is
    // shared by ICR/FCR handler and this.processContainer
    // TODO: use a Read lock after introducing a RW lock into ContainerInfo
    ContainerInfo cif = containerManager.getContainer(cid);
    synchronized (cif) {
        final Set<ContainerReplica> currentReplicas = containerManager.getContainerReplicas(cid);
        final Set<DatanodeDetails> replicas = currentReplicas.stream().map(ContainerReplica::getDatanodeDetails).collect(Collectors.toSet());
        if (replicas.contains(targetDn)) {
            ret.complete(MoveResult.REPLICATION_FAIL_EXIST_IN_TARGET);
            return ret;
        }
        if (!replicas.contains(srcDn)) {
            ret.complete(MoveResult.REPLICATION_FAIL_NOT_EXIST_IN_SOURCE);
            return ret;
        }
        if (inflightReplication.containsKey(cid)) {
            ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_REPLICATION);
            return ret;
        }
        if (inflightDeletion.containsKey(cid)) {
            ret.complete(MoveResult.REPLICATION_FAIL_INFLIGHT_DELETION);
            return ret;
        }
        /*
      * here, no need to see whether cid is in inflightMove, because
      * these three map are all synchronized on ContainerInfo, if cid
      * is in infligtMove , it must now being replicated or deleted,
      * so it must be in inflightReplication or in infligthDeletion.
      * thus, if we can not find cid in both of them , this cid must
      * not be in inflightMove.
      */
        LifeCycleState currentContainerStat = cif.getState();
        if (currentContainerStat != LifeCycleState.CLOSED) {
            ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED);
            return ret;
        }
        // satisfies current placement policy
        if (!isPolicySatisfiedAfterMove(cif, srcDn, targetDn, currentReplicas.stream().collect(Collectors.toList()))) {
            ret.complete(MoveResult.PLACEMENT_POLICY_NOT_SATISFIED);
            return ret;
        }
        try {
            moveScheduler.startMove(cid.getProtobuf(), mp.getProtobufMessage(CURRENT_VERSION));
        } catch (IOException e) {
            LOG.warn("Exception while starting move {}", cid);
            ret.complete(MoveResult.FAIL_CAN_NOT_RECORD_TO_DB);
            return ret;
        }
        inflightMoveFuture.putIfAbsent(cid, ret);
        sendReplicateCommand(cif, targetDn, Collections.singletonList(srcDn));
    }
    LOG.info("receive a move request about container {} , from {} to {}", cid, srcDn.getUuid(), targetDn.getUuid());
    return ret;
}
Also used : NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) IOException(java.io.IOException) CompletableFuture(java.util.concurrent.CompletableFuture) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) LifeCycleState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus)

Example 3 with NodeState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.

the class TestHDDSUpgrade method testDataNodesStateOnSCM.

/*
   * Helper function to test DataNode state on the SCM. Note that due to
   * timing constraints, sometime the node-state can transition to the next
   * state. This function expects the DataNode to be in NodeState "state" or
   * "alternateState". Some tests can enforce a unique NodeState test by
   * setting "alternateState = null".
   */
private void testDataNodesStateOnSCM(NodeState state, NodeState alternateState) {
    int countNodes = 0;
    for (DatanodeDetails dn : scm.getScmNodeManager().getAllNodes()) {
        try {
            NodeState dnState = scm.getScmNodeManager().getNodeStatus(dn).getHealth();
            Assert.assertTrue((dnState == state) || (alternateState == null ? false : dnState == alternateState));
        } catch (NodeNotFoundException e) {
            e.printStackTrace();
            Assert.fail("Node not found");
        }
        ++countNodes;
    }
    Assert.assertEquals(NUM_DATA_NODES, countNodes);
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails)

Example 4 with NodeState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.

the class NodeEndpoint method getDatanodes.

/**
 * Return the list of datanodes with detailed information about each datanode.
 * @return {@link Response}
 */
@GET
public Response getDatanodes() {
    List<DatanodeMetadata> datanodes = new ArrayList<>();
    List<DatanodeDetails> datanodeDetails = nodeManager.getAllNodes();
    datanodeDetails.forEach(datanode -> {
        DatanodeStorageReport storageReport = getStorageReport(datanode);
        NodeState nodeState = null;
        try {
            nodeState = nodeManager.getNodeStatus(datanode).getHealth();
        } catch (NodeNotFoundException e) {
            LOG.warn("Cannot get nodeState for datanode {}", datanode, e);
        }
        final NodeOperationalState nodeOpState = datanode.getPersistedOpState();
        String hostname = datanode.getHostName();
        Set<PipelineID> pipelineIDs = nodeManager.getPipelines(datanode);
        List<DatanodePipeline> pipelines = new ArrayList<>();
        AtomicInteger leaderCount = new AtomicInteger();
        AtomicInteger openContainers = new AtomicInteger();
        DatanodeMetadata.Builder builder = DatanodeMetadata.newBuilder();
        pipelineIDs.forEach(pipelineID -> {
            try {
                Pipeline pipeline = pipelineManager.getPipeline(pipelineID);
                String leaderNode = pipeline.getLeaderNode().getHostName();
                DatanodePipeline datanodePipeline = new DatanodePipeline(pipelineID.getId(), pipeline.getReplicationConfig().getReplicationType().toString(), ReplicationConfig.getLegacyFactor(pipeline.getReplicationConfig()).getNumber(), leaderNode);
                pipelines.add(datanodePipeline);
                if (datanode.getUuid().equals(pipeline.getLeaderId())) {
                    leaderCount.getAndIncrement();
                }
                int openContainerPerPipeline = reconContainerManager.getPipelineToOpenContainer().getOrDefault(pipelineID, 0);
                openContainers.getAndAdd(openContainerPerPipeline);
            } catch (PipelineNotFoundException ex) {
                LOG.warn("Cannot get pipeline {} for datanode {}, pipeline not found", pipelineID.getId(), hostname, ex);
            } catch (IOException ioEx) {
                LOG.warn("Cannot get leader node of pipeline with id {}.", pipelineID.getId(), ioEx);
            }
        });
        try {
            Set<ContainerID> allContainers = nodeManager.getContainers(datanode);
            builder.withContainers(allContainers.size());
            builder.withOpenContainers(openContainers.get());
        } catch (NodeNotFoundException ex) {
            LOG.warn("Cannot get containers, datanode {} not found.", datanode.getUuid(), ex);
        }
        DatanodeInfo dnInfo = (DatanodeInfo) datanode;
        datanodes.add(builder.withHostname(nodeManager.getHostName(datanode)).withDatanodeStorageReport(storageReport).withLastHeartbeat(nodeManager.getLastHeartbeat(datanode)).withState(nodeState).withOperationalState(nodeOpState).withPipelines(pipelines).withLeaderCount(leaderCount.get()).withUUid(datanode.getUuidString()).withVersion(nodeManager.getVersion(datanode)).withSetupTime(nodeManager.getSetupTime(datanode)).withRevision(nodeManager.getRevision(datanode)).withBuildDate(nodeManager.getBuildDate(datanode)).withLayoutVersion(dnInfo.getLastKnownLayoutVersion().getMetadataLayoutVersion()).build());
    });
    DatanodesResponse datanodesResponse = new DatanodesResponse(datanodes.size(), datanodes);
    return Response.ok(datanodesResponse).build();
}
Also used : DatanodeInfo(org.apache.hadoop.hdds.scm.node.DatanodeInfo) NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) DatanodesResponse(org.apache.hadoop.ozone.recon.api.types.DatanodesResponse) DatanodeStorageReport(org.apache.hadoop.ozone.recon.api.types.DatanodeStorageReport) DatanodeMetadata(org.apache.hadoop.ozone.recon.api.types.DatanodeMetadata) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DatanodePipeline(org.apache.hadoop.ozone.recon.api.types.DatanodePipeline) DatanodePipeline(org.apache.hadoop.ozone.recon.api.types.DatanodePipeline) Pipeline(org.apache.hadoop.hdds.scm.pipeline.Pipeline) NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) PipelineID(org.apache.hadoop.hdds.scm.pipeline.PipelineID) PipelineNotFoundException(org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException) GET(javax.ws.rs.GET)

Example 5 with NodeState

use of org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState in project ozone by apache.

the class TestNodeStateMap method testGetNodeMethodsReturnCorrectCountsAndStates.

@Test
public void testGetNodeMethodsReturnCorrectCountsAndStates() throws NodeAlreadyExistsException {
    // Add one node for all possible states
    int nodeCount = 0;
    for (NodeOperationalState op : NodeOperationalState.values()) {
        for (NodeState health : NodeState.values()) {
            addRandomNodeWithState(op, health);
            nodeCount++;
        }
    }
    NodeStatus requestedState = NodeStatus.inServiceStale();
    List<UUID> nodes = map.getNodes(requestedState);
    assertEquals(1, nodes.size());
    assertEquals(1, map.getNodeCount(requestedState));
    assertEquals(nodeCount, map.getTotalNodeCount());
    assertEquals(nodeCount, map.getAllNodes().size());
    assertEquals(nodeCount, map.getAllDatanodeInfos().size());
    // Checks for the getNodeCount(opstate, health) method
    assertEquals(nodeCount, map.getNodeCount(null, null));
    assertEquals(1, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, NodeState.STALE));
    assertEquals(5, map.getNodeCount(null, NodeState.HEALTHY));
    assertEquals(4, map.getNodeCount(NodeOperationalState.DECOMMISSIONING, null));
}
Also used : NodeState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState) NodeOperationalState(org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState) UUID(java.util.UUID) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus) Test(org.junit.Test)

Aggregations

NodeState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)8 NodeOperationalState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState)5 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)3 IOException (java.io.IOException)2 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)2 NodeNotFoundException (org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException)2 InvalidStateTransitionException (org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 UUID (java.util.UUID)1 CompletableFuture (java.util.concurrent.CompletableFuture)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 GET (javax.ws.rs.GET)1 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)1 LifeCycleState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState)1 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)1 DatanodeInfo (org.apache.hadoop.hdds.scm.node.DatanodeInfo)1 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)1