Search in sources :

Example 21 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class ContainerBalancer method moveContainer.

/**
 * Asks {@link ReplicationManager} to move the specified container from
 * source to target.
 *
 * @param source the source datanode
 * @param moveSelection the selected container to move and target datanode
 * @return false if an exception occurred, the move completed
 * exceptionally, or the move completed with a result other than
 * ReplicationManager.MoveResult.COMPLETED. Returns true if the move
 * completed with MoveResult.COMPLETED or move is not yet done
 */
private boolean moveContainer(DatanodeDetails source, ContainerMoveSelection moveSelection) {
    ContainerID container = moveSelection.getContainerID();
    CompletableFuture<ReplicationManager.MoveResult> future;
    try {
        future = replicationManager.move(container, source, moveSelection.getTargetNode());
    } catch (ContainerNotFoundException e) {
        LOG.warn("Could not find Container {} for container move", container, e);
        return false;
    } catch (NodeNotFoundException e) {
        LOG.warn("Container move failed for container {}", container, e);
        return false;
    }
    if (future.isDone()) {
        if (future.isCompletedExceptionally()) {
            LOG.info("Container move for container {} from source {} to target {}" + "completed exceptionally", container.toString(), source.getUuidString(), moveSelection.getTargetNode().getUuidString());
            return false;
        } else {
            ReplicationManager.MoveResult result = future.join();
            moveSelectionToFutureMap.put(moveSelection, future);
            return result == ReplicationManager.MoveResult.COMPLETED;
        }
    } else {
        moveSelectionToFutureMap.put(moveSelection, future);
        return true;
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) ReplicationManager(org.apache.hadoop.hdds.scm.container.ReplicationManager) ContainerID(org.apache.hadoop.hdds.scm.container.ContainerID) ContainerNotFoundException(org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)

Example 22 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class IncrementalContainerReportHandler method onMessage.

@Override
public void onMessage(final IncrementalContainerReportFromDatanode report, final EventPublisher publisher) {
    final DatanodeDetails dnFromReport = report.getDatanodeDetails();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Processing incremental container report from data node {}", dnFromReport.getUuid());
    }
    DatanodeDetails dd = nodeManager.getNodeByUuid(dnFromReport.getUuidString());
    if (dd == null) {
        LOG.warn("Received container report from unknown datanode {}", dnFromReport);
        return;
    }
    boolean success = true;
    // ContainerManager.
    synchronized (dd) {
        for (ContainerReplicaProto replicaProto : report.getReport().getReportList()) {
            ContainerID id = ContainerID.valueOf(replicaProto.getContainerID());
            ContainerInfo container = null;
            try {
                try {
                    container = getContainerManager().getContainer(id);
                    // Ensure we reuse the same ContainerID instance in containerInfo
                    id = container.containerID();
                } finally {
                    if (!replicaProto.getState().equals(ContainerReplicaProto.State.DELETED)) {
                        nodeManager.addContainer(dd, id);
                    }
                }
                processContainerReplica(dd, container, replicaProto, publisher);
            } catch (ContainerNotFoundException e) {
                success = false;
                LOG.warn("Container {} not found!", replicaProto.getContainerID());
            } catch (NodeNotFoundException ex) {
                success = false;
                LOG.error("Received ICR from unknown datanode {}", report.getDatanodeDetails(), ex);
            } catch (ContainerReplicaNotFoundException e) {
                success = false;
                LOG.warn("Container {} replica not found!", replicaProto.getContainerID());
            } catch (IOException | InvalidStateTransitionException e) {
                success = false;
                LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID(), e);
            }
        }
    }
    getContainerManager().notifyContainerReportProcessing(false, success);
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) InvalidStateTransitionException(org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) IOException(java.io.IOException) ContainerReplicaProto(org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto)

Example 23 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeStateManager method forceNodesToHealthyReadOnly.

public void forceNodesToHealthyReadOnly() {
    try {
        List<UUID> nodes = nodeStateMap.getNodes(null, HEALTHY);
        for (UUID id : nodes) {
            DatanodeInfo node = nodeStateMap.getNodeInfo(id);
            nodeStateMap.updateNodeHealthState(node.getUuid(), HEALTHY_READONLY);
            if (state2EventMap.containsKey(HEALTHY_READONLY)) {
                eventPublisher.fireEvent(state2EventMap.get(HEALTHY_READONLY), node);
            }
        }
    } catch (NodeNotFoundException ex) {
        LOG.error("Inconsistent NodeStateMap! {}", nodeStateMap);
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) UUID(java.util.UUID)

Example 24 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class NodeDecommissionManager method recommissionNodes.

public synchronized List<DatanodeAdminError> recommissionNodes(List<String> nodes) throws InvalidHostStringException {
    List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
    List<DatanodeAdminError> errors = new ArrayList<>();
    for (DatanodeDetails dn : dns) {
        try {
            recommission(dn);
        } catch (NodeNotFoundException e) {
            // We already validated the host strings and retrieved the DnDetails
            // object from the node manager. Therefore we should never get a
            // NodeNotFoundException here expect if the node is remove in the
            // very short window between validation and starting decom. Therefore
            // log a warning and ignore the exception
            LOG.warn("Host {} was not found in SCM. Ignoring the request to " + "recommission it.", dn.getHostName());
            errors.add(new DatanodeAdminError(dn.getHostName(), "The host was not found in SCM"));
        }
    }
    return errors;
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails) ArrayList(java.util.ArrayList) DatanodeAdminError(org.apache.hadoop.hdds.scm.DatanodeAdminError)

Example 25 with NodeNotFoundException

use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.

the class DatanodeAdminMonitorImpl method processTransitioningNodes.

private void processTransitioningNodes() {
    Iterator<DatanodeDetails> iterator = trackedNodes.iterator();
    while (iterator.hasNext()) {
        DatanodeDetails dn = iterator.next();
        try {
            NodeStatus status = getNodeStatus(dn);
            if (!shouldContinueWorkflow(dn, status)) {
                abortWorkflow(dn);
                iterator.remove();
                continue;
            }
            if (status.isMaintenance()) {
                if (status.operationalStateExpired()) {
                    completeMaintenance(dn);
                    iterator.remove();
                    continue;
                }
            }
            if (status.isDecommissioning() || status.isEnteringMaintenance()) {
                if (checkPipelinesClosedOnNode(dn) && // state.
                status.getOperationalState() == dn.getPersistedOpState() && checkContainersReplicatedOnNode(dn)) {
                    // CheckContainersReplicatedOnNode may take a short time to run
                    // so after it completes, re-get the nodestatus to check the health
                    // and ensure the state is still good to continue
                    status = getNodeStatus(dn);
                    if (status.isDead()) {
                        LOG.warn("Datanode {} is dead and the admin workflow cannot " + "continue. The node will be put back to IN_SERVICE and " + "handled as a dead node", dn);
                        putNodeBackInService(dn);
                        iterator.remove();
                    } else if (status.isDecommissioning()) {
                        completeDecommission(dn);
                        iterator.remove();
                    } else if (status.isEnteringMaintenance()) {
                        putIntoMaintenance(dn);
                    }
                }
            }
        } catch (NodeNotFoundException e) {
            LOG.error("An unexpected error occurred processing datanode {}. " + "Aborting the admin workflow", dn, e);
            abortWorkflow(dn);
            iterator.remove();
        }
    }
}
Also used : NodeNotFoundException(org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException) DatanodeDetails(org.apache.hadoop.hdds.protocol.DatanodeDetails)

Aggregations

NodeNotFoundException (org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException)25 DatanodeDetails (org.apache.hadoop.hdds.protocol.DatanodeDetails)16 ArrayList (java.util.ArrayList)7 IOException (java.io.IOException)6 ContainerID (org.apache.hadoop.hdds.scm.container.ContainerID)4 NodeStatus (org.apache.hadoop.hdds.scm.node.NodeStatus)4 UUID (java.util.UUID)3 HddsProtos (org.apache.hadoop.hdds.protocol.proto.HddsProtos)3 DatanodeAdminError (org.apache.hadoop.hdds.scm.DatanodeAdminError)3 InvalidStateTransitionException (org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException)3 List (java.util.List)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors (java.util.stream.Collectors)2 NodeState (org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState)2 ContainerReplicaProto (org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto)2 ContainerNotFoundException (org.apache.hadoop.hdds.scm.container.ContainerNotFoundException)2 DatanodeInfo (org.apache.hadoop.hdds.scm.node.DatanodeInfo)2 Pipeline (org.apache.hadoop.hdds.scm.pipeline.Pipeline)2 PipelineID (org.apache.hadoop.hdds.scm.pipeline.PipelineID)2 Longs (com.google.common.primitives.Longs)1