Search in sources :

Example 6 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeDecommissionManager method decommissionNodes.

public synchronized List<DatanodeAdminError> decommissionNodes(List<String> nodes) throws InvalidHostStringException {
    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
    List<DatanodeAdminError> errors = new ArrayList<>();
    for (DatanodeDetails dn : dns) {
        try {
            startDecommission(dn);
        } catch (NodeNotFoundException e) {
            // We already validated the host strings and retrieved the DnDetails
            // object from the node manager. Therefore we should never get a
            // NodeNotFoundException here expect if the node is remove in the
            // very short window between validation and starting decom. Therefore
            // log a warning and ignore the exception
            LOG.warn("The host {} was not found in SCM. Ignoring the request to " + "decommission it", dn.getHostName());
            errors.add(new DatanodeAdminError(dn.getHostName(), "The host was not found in SCM"));
        } catch (InvalidNodeStateException e) {
            errors.add(new DatanodeAdminError(dn.getHostName(), e.getMessage()));
        }
    }
    return errors;
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ArrayList(java.util.ArrayList) DatanodeAdminError(org.apache.hadoop.hdds.scm.DatanodeAdminError)

Example 7 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class DeadNodeHandler method onMessage.

@Override
public void onMessage(final DatanodeDetails datanodeDetails, final EventPublisher publisher) {
    try {
        /*
       * We should have already destroyed all the pipelines on this datanode
       * when it was marked as stale. Destroy pipeline should also have closed
       * all the containers on this datanode.
       *
       * Ideally we should not have any pipeline or OPEN containers now.
       *
       * To be on a safer side, we double check here and take appropriate
       * action.
       */
        LOG.info("A dead datanode is detected. {}", datanodeDetails);
        destroyPipelines(datanodeDetails);
        closeContainers(datanodeDetails, publisher);
        // is IN_MAINTENANCE
        if (!nodeManager.getNodeStatus(datanodeDetails).isInMaintenance()) {
            removeContainerReplicas(datanodeDetails);
        }
        // move dead datanode out of ClusterNetworkTopology
        NetworkTopology nt = nodeManager.getClusterNetworkTopologyMap();
        if (nt.contains(datanodeDetails)) {
            nt.remove(datanodeDetails);
            // make sure after DN is removed from topology,
            // DatanodeDetails instance returned from nodeStateManager has no parent.
            Preconditions.checkState(nodeManager.getNodeByUuid(datanodeDetails.getUuidString()).getParent() == null);
        }
    } catch (NodeNotFoundException ex) {
        // This should not happen, we cannot get a dead node event for an
        // unregistered datanode!
        LOG.error("DeadNode event for a unregistered node: {}!", datanodeDetails);
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) NetworkTopology(org.apache.hadoop.hdds.scm.net.NetworkTopology)

Example 8 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeStateManager method addNode.

/**
 * Adds a new node to the state manager.
 *
 * @param datanodeDetails DatanodeDetails
 * @param layoutInfo LayoutVersionProto
 *
 * @throws NodeAlreadyExistsException if the node is already present
 */
public void addNode(DatanodeDetails datanodeDetails, LayoutVersionProto layoutInfo) throws NodeAlreadyExistsException {
    NodeStatus newNodeStatus = newNodeStatus(datanodeDetails, layoutInfo);
    nodeStateMap.addNode(datanodeDetails, newNodeStatus, layoutInfo);
    UUID dnID = datanodeDetails.getUuid();
    try {
        updateLastKnownLayoutVersion(datanodeDetails, layoutInfo);
    } catch (NodeNotFoundException ex) {
        LOG.error("Inconsistent NodeStateMap! Datanode with ID {} was " + "added but not found in  map: {}", dnID, nodeStateMap);
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) UUID(java.util.UUID)

Example 9 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class DatanodeAdminMonitorImpl method processCancelledNodes.

private void processCancelledNodes() {
    while (!cancelledNodes.isEmpty()) {
        DatanodeDetails dn = cancelledNodes.poll();
        try {
            stopTrackingNode(dn);
            putNodeBackInService(dn);
            LOG.info("Recommissioned node {}", dn);
        } catch (NodeNotFoundException e) {
            LOG.warn("Failed processing the cancel admin request for {}", dn, e);
        }
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails)

Example 10 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class ReplicationManager method updateInflightAction.

/**
 * Reconciles the InflightActions for a given container.
 *
 * @param container Container to update
 * @param inflightActions inflightReplication (or) inflightDeletion
 * @param filter filter to check if the operation is completed
 * @param timeoutCounter update timeout metrics
 * @param completedCounter update completed metrics
 */
private void updateInflightAction(final ContainerInfo container, final Map<ContainerID, List<InflightAction>> inflightActions, final Predicate<InflightAction> filter, final Runnable timeoutCounter, final Consumer<InflightAction> completedCounter) {
    final ContainerID id = container.containerID();
    final long deadline = clock.millis() - rmConf.getEventTimeout();
    if (inflightActions.containsKey(id)) {
        final List<InflightAction> actions = inflightActions.get(id);
        Iterator<InflightAction> iter = actions.iterator();
        while (iter.hasNext()) {
            try {
                InflightAction a = iter.next();
                NodeStatus status = nodeManager.getNodeStatus(a.datanode);
                boolean isUnhealthy = status.getHealth() != NodeState.HEALTHY;
                boolean isCompleted = filter.test(a);
                boolean isTimeout = a.time < deadline;
                boolean isNotInService = status.getOperationalState() != NodeOperationalState.IN_SERVICE;
                if (isCompleted || isUnhealthy || isTimeout || isNotInService) {
                    iter.remove();
                    if (isTimeout) {
                        timeoutCounter.run();
                    } else if (isCompleted) {
                        completedCounter.accept(a);
                    }
                    updateMoveIfNeeded(isUnhealthy, isCompleted, isTimeout, isNotInService, container, a.datanode, inflightActions);
                }
            } catch (NodeNotFoundException | ContainerNotFoundException e) {
                // Should not happen, but if it does, just remove the action as the
                // node somehow does not exist;
                iter.remove();
            }
        }
        if (actions.isEmpty()) {
            inflightActions.remove(id);
        }
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) NodeStatus(org.apache.hadoop.hdds.scm.node.NodeStatus)

Aggregations

NodeNotFoundException (org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException)25 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)16 ArrayList (java.util.ArrayList)7 IOException (java.io.IOException)6 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)4 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)4 UUID (java.util.UUID)3 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)3 DatanodeAdminError (org.apache.hadoop.hdds.scm.DatanodeAdminError)3 InvalidStateTransitionException (org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException)3 List (java.util.List)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors (java.util.stream.Collectors)2 NodeState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)2 ContainerReplicaProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto)2 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)2 DatanodeInfo (org.apache.hadoop.hdds.scm.node.DatanodeInfo)2 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)2 PipelineID (org.apache.hadoop.hdds.scm.pipeline.PipelineID)2 Longs (com.google.common.primitives.Longs)1