use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.
the class ContainerBalancer method moveContainer.
/**
* Asks {@link ReplicationManager} to move the specified container from
* source to target.
*
* @param source the source datanode
* @param moveSelection the selected container to move and target datanode
* @return false if an exception occurred, the move completed
* exceptionally, or the move completed with a result other than
* ReplicationManager.MoveResult.COMPLETED. Returns true if the move
* completed with MoveResult.COMPLETED or move is not yet done
*/
private boolean moveContainer(DatanodeDetails source, ContainerMoveSelection moveSelection) {
ContainerID container = moveSelection.getContainerID();
CompletableFuture<ReplicationManager.MoveResult> future;
try {
future = replicationManager.move(container, source, moveSelection.getTargetNode());
} catch (ContainerNotFoundException e) {
LOG.warn("Could not find Container {} for container move", container, e);
return false;
} catch (NodeNotFoundException e) {
LOG.warn("Container move failed for container {}", container, e);
return false;
}
if (future.isDone()) {
if (future.isCompletedExceptionally()) {
LOG.info("Container move for container {} from source {} to target {}" + "completed exceptionally", container.toString(), source.getUuidString(), moveSelection.getTargetNode().getUuidString());
return false;
} else {
ReplicationManager.MoveResult result = future.join();
moveSelectionToFutureMap.put(moveSelection, future);
return result == ReplicationManager.MoveResult.COMPLETED;
}
} else {
moveSelectionToFutureMap.put(moveSelection, future);
return true;
}
}
use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.
the class IncrementalContainerReportHandler method onMessage.
@Override
public void onMessage(final IncrementalContainerReportFromDatanode report, final EventPublisher publisher) {
final DatanodeDetails dnFromReport = report.getDatanodeDetails();
if (LOG.isDebugEnabled()) {
LOG.debug("Processing incremental container report from data node {}", dnFromReport.getUuid());
}
DatanodeDetails dd = nodeManager.getNodeByUuid(dnFromReport.getUuidString());
if (dd == null) {
LOG.warn("Received container report from unknown datanode {}", dnFromReport);
return;
}
boolean success = true;
// ContainerManager.
synchronized (dd) {
for (ContainerReplicaProto replicaProto : report.getReport().getReportList()) {
ContainerID id = ContainerID.valueOf(replicaProto.getContainerID());
ContainerInfo container = null;
try {
try {
container = getContainerManager().getContainer(id);
// Ensure we reuse the same ContainerID instance in containerInfo
id = container.containerID();
} finally {
if (!replicaProto.getState().equals(ContainerReplicaProto.State.DELETED)) {
nodeManager.addContainer(dd, id);
}
}
processContainerReplica(dd, container, replicaProto, publisher);
} catch (ContainerNotFoundException e) {
success = false;
LOG.warn("Container {} not found!", replicaProto.getContainerID());
} catch (NodeNotFoundException ex) {
success = false;
LOG.error("Received ICR from unknown datanode {}", report.getDatanodeDetails(), ex);
} catch (ContainerReplicaNotFoundException e) {
success = false;
LOG.warn("Container {} replica not found!", replicaProto.getContainerID());
} catch (IOException | InvalidStateTransitionException e) {
success = false;
LOG.error("Exception while processing ICR for container {}", replicaProto.getContainerID(), e);
}
}
}
getContainerManager().notifyContainerReportProcessing(false, success);
}
use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.
the class NodeStateManager method forceNodesToHealthyReadOnly.
public void forceNodesToHealthyReadOnly() {
try {
List<UUID> nodes = nodeStateMap.getNodes(null, HEALTHY);
for (UUID id : nodes) {
DatanodeInfo node = nodeStateMap.getNodeInfo(id);
nodeStateMap.updateNodeHealthState(node.getUuid(), HEALTHY_READONLY);
if (state2EventMap.containsKey(HEALTHY_READONLY)) {
eventPublisher.fireEvent(state2EventMap.get(HEALTHY_READONLY), node);
}
}
} catch (NodeNotFoundException ex) {
LOG.error("Inconsistent NodeStateMap! {}", nodeStateMap);
}
}
use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.
the class NodeDecommissionManager method recommissionNodes.
public synchronized List<DatanodeAdminError> recommissionNodes(List<String> nodes) throws InvalidHostStringException {
List<DatanodeDetails> dns = mapHostnamesToDatanodes(nodes);
List<DatanodeAdminError> errors = new ArrayList<>();
for (DatanodeDetails dn : dns) {
try {
recommission(dn);
} catch (NodeNotFoundException e) {
// We already validated the host strings and retrieved the DnDetails
// object from the node manager. Therefore we should never get a
// NodeNotFoundException here expect if the node is remove in the
// very short window between validation and starting decom. Therefore
// log a warning and ignore the exception
LOG.warn("Host {} was not found in SCM. Ignoring the request to " + "recommission it.", dn.getHostName());
errors.add(new DatanodeAdminError(dn.getHostName(), "The host was not found in SCM"));
}
}
return errors;
}
use of org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException in project ozone by apache.
the class DatanodeAdminMonitorImpl method processTransitioningNodes.
private void processTransitioningNodes() {
Iterator<DatanodeDetails> iterator = trackedNodes.iterator();
while (iterator.hasNext()) {
DatanodeDetails dn = iterator.next();
try {
NodeStatus status = getNodeStatus(dn);
if (!shouldContinueWorkflow(dn, status)) {
abortWorkflow(dn);
iterator.remove();
continue;
}
if (status.isMaintenance()) {
if (status.operationalStateExpired()) {
completeMaintenance(dn);
iterator.remove();
continue;
}
}
if (status.isDecommissioning() || status.isEnteringMaintenance()) {
if (checkPipelinesClosedOnNode(dn) && // state.
status.getOperationalState() == dn.getPersistedOpState() && checkContainersReplicatedOnNode(dn)) {
// CheckContainersReplicatedOnNode may take a short time to run
// so after it completes, re-get the nodestatus to check the health
// and ensure the state is still good to continue
status = getNodeStatus(dn);
if (status.isDead()) {
LOG.warn("Datanode {} is dead and the admin workflow cannot " + "continue. The node will be put back to IN_SERVICE and " + "handled as a dead node", dn);
putNodeBackInService(dn);
iterator.remove();
} else if (status.isDecommissioning()) {
completeDecommission(dn);
iterator.remove();
} else if (status.isEnteringMaintenance()) {
putIntoMaintenance(dn);
}
}
}
} catch (NodeNotFoundException e) {
LOG.error("An unexpected error occurred processing datanode {}. " + "Aborting the admin workflow", dn, e);
abortWorkflow(dn);
iterator.remove();
}
}
}
Aggregations