Search in sources :

Example 6 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeDecommissionManager method decommissionNodes.

public synchronized List<DatanodeAdminError> decommissionNodes(List<String> nodes) throws InvalidHostStringException {
    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
    List<DatanodeAdminError> errors = new ArrayList<>();
    for (DatanodeDetails dn : dns) {
        try {
        } catch (NodeNotFoundException e) {
            // We already validated the host strings and retrieved the DnDetails
            // object from the node manager. Therefore we should never get a
            // NodeNotFoundException here expect if the node is remove in the
            // very short window between validation and starting decom. Therefore
            // log a warning and ignore the exception
            LOG.warn("The host {} was not found in SCM. Ignoring the request to " + "decommission it", dn.getHostName());
            errors.add(new DatanodeAdminError(dn.getHostName(), "The host was not found in SCM"));
        } catch (InvalidNodeStateException e) {
            errors.add(new DatanodeAdminError(dn.getHostName(), e.getMessage()));
    return errors;
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ArrayList(java.util.ArrayList) DatanodeAdminError(org.apache.hadoop.hdds.scm.DatanodeAdminError)

Example 7 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class DeadNodeHandler method onMessage.

public void onMessage(final DatanodeDetails datanodeDetails, final EventPublisher publisher) {
    try {
       * We should have already destroyed all the pipelines on this datanode
       * when it was marked as stale. Destroy pipeline should also have closed
       * all the containers on this datanode.
       * Ideally we should not have any pipeline or OPEN containers now.
       * To be on a safer side, we double check here and take appropriate
       * action.
       */"A dead datanode is detected. {}", datanodeDetails);
        closeContainers(datanodeDetails, publisher);
        // is IN_MAINTENANCE
        if (!nodeManager.getNodeStatus(datanodeDetails).isInMaintenance()) {
        // move dead datanode out of ClusterNetworkTopology
        NetworkTopology nt = nodeManager.getClusterNetworkTopologyMap();
        if (nt.contains(datanodeDetails)) {
            // make sure after DN is removed from topology,
            // DatanodeDetails instance returned from nodeStateManager has no parent.
            Preconditions.checkState(nodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getParent() == null);
    } catch (NodeNotFoundException ex) {
        // This should not happen, we cannot get a dead node event for an
        // unregistered datanode!
        LOG.error("DeadNode event for a unregistered node: {}!", datanodeDetails);
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) NetworkTopology(

Example 8 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeStateManager method addNode.

 * Adds a new node to the state manager.
 * @param datanodeDetails DatanodeDetails
 * @param layoutInfo LayoutVersionProto
 * @throws NodeAlreadyExistsException if the node is already present
public void addNode(DatanodeDetails datanodeDetails, LayoutVersionProto layoutInfo) throws NodeAlreadyExistsException {
    NodeStatus newNodeStatus = newNodeStatus(datanodeDetails, layoutInfo);
    nodeStateMap.addNode(datanodeDetails, newNodeStatus, layoutInfo);
    UUID dnID = datanodeDetails.getUuid();
    try {
        updateLastKnownLayoutVersion(datanodeDetails, layoutInfo);
    } catch (NodeNotFoundException ex) {
        LOG.error("Inconsistent NodeStateMap! Datanode with ID {} was " + "added but not found in  map: {}", dnID, nodeStateMap);
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) UUID(java.util.UUID)

Example 9 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class DatanodeAdminMonitorImpl method processCancelledNodes.

private void processCancelledNodes() {
    while (!cancelledNodes.isEmpty()) {
        DatanodeDetails dn = cancelledNodes.poll();
        try {
  "Recommissioned node {}", dn);
        } catch (NodeNotFoundException e) {
            LOG.warn("Failed processing the cancel admin request for {}", dn, e);
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails)

Example 10 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class ReplicationManager method updateInflightAction.

 * Reconciles the InflightActions for a given container.
 * @param container Container to update
 * @param inflightActions inflightReplication (or) inflightDeletion
 * @param filter filter to check if the operation is completed
 * @param timeoutCounter update timeout metrics
 * @param completedCounter update completed metrics
private void updateInflightAction(final ContainerInfo container, final Map<ContainerID, List<InflightAction>> inflightActions, final Predicate<InflightAction> filter, final Runnable timeoutCounter, final Consumer<InflightAction> completedCounter) {
    final ContainerID id = container.containerID();
    final long deadline = clock.millis() - rmConf.getEventTimeout();
    if (inflightActions.containsKey(id)) {
        final List<InflightAction> actions = inflightActions.get(id);
        Iterator<InflightAction> iter = actions.iterator();
        while (iter.hasNext()) {
            try {
                InflightAction a =;
                NodeStatus status = nodeManager.getNodeStatus(a.datanode);
                boolean isUnhealthy = status.getHealth() != NodeState.HEALTHY;
                boolean isCompleted = filter.test(a);
                boolean isTimeout = a.time < deadline;
                boolean isNotInService = status.getOperationalState() != NodeOperationalState.IN_SERVICE;
                if (isCompleted || isUnhealthy || isTimeout || isNotInService) {
                    if (isTimeout) {
                    } else if (isCompleted) {
                    updateMoveIfNeeded(isUnhealthy, isCompleted, isTimeout, isNotInService, container, a.datanode, inflightActions);
            } catch (NodeNotFoundException | ContainerNotFoundException e) {
                // Should not happen, but if it does, just remove the action as the
                // node somehow does not exist;
        if (actions.isEmpty()) {
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus)


NodeNotFoundException (org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException)25 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)16 ArrayList (java.util.ArrayList)7 IOException ( ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)4 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)4 UUID (java.util.UUID)3 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)3 DatanodeAdminError (org.apache.hadoop.hdds.scm.DatanodeAdminError)3 InvalidStateTransitionException (org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException)3 List (java.util.List)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors ( NodeState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)2 ContainerReplicaProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto)2 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)2 DatanodeInfo (org.apache.hadoop.hdds.scm.node.DatanodeInfo)2 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)2 PipelineID (org.apache.hadoop.hdds.scm.pipeline.PipelineID)2 Longs (