use of org.opensearch.cluster.NotMasterException in project OpenSearch by opensearch-project.
the class JoinTaskExecutor method execute.
@Override
public ClusterTasksResult<Task> execute(ClusterState currentState, List<Task> joiningNodes) throws Exception {
final ClusterTasksResult.Builder<Task> results = ClusterTasksResult.builder();
final DiscoveryNodes currentNodes = currentState.nodes();
boolean nodesChanged = false;
ClusterState.Builder newState;
if (joiningNodes.size() == 1 && joiningNodes.get(0).isFinishElectionTask()) {
return results.successes(joiningNodes).build(currentState);
} else if (currentNodes.getMasterNode() == null && joiningNodes.stream().anyMatch(Task::isBecomeMasterTask)) {
assert joiningNodes.stream().anyMatch(Task::isFinishElectionTask) : "becoming a master but election is not finished " + joiningNodes;
// use these joins to try and become the master.
// Note that we don't have to do any validation of the amount of joining nodes - the commit
// during the cluster state publishing guarantees that we have enough
newState = becomeMasterAndTrimConflictingNodes(currentState, joiningNodes);
nodesChanged = true;
} else if (currentNodes.isLocalNodeElectedMaster() == false) {
logger.trace("processing node joins, but we are not the master. current master: {}", currentNodes.getMasterNode());
throw new NotMasterException("Node [" + currentNodes.getLocalNode() + "] not master for join request");
} else {
newState = ClusterState.builder(currentState);
}
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(newState.nodes());
assert nodesBuilder.isLocalNodeElectedMaster();
Version minClusterNodeVersion = newState.nodes().getMinNodeVersion();
Version maxClusterNodeVersion = newState.nodes().getMaxNodeVersion();
// we only enforce major version transitions on a fully formed clusters
final boolean enforceMajorVersion = currentState.getBlocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false;
// processing any joins
Map<String, String> joiniedNodeNameIds = new HashMap<>();
for (final Task joinTask : joiningNodes) {
if (joinTask.isBecomeMasterTask() || joinTask.isFinishElectionTask()) {
// noop
} else if (currentNodes.nodeExistsWithSameRoles(joinTask.node()) && !currentNodes.nodeExistsWithBWCVersion(joinTask.node())) {
logger.debug("received a join request for an existing node [{}]", joinTask.node());
} else {
final DiscoveryNode node = joinTask.node();
try {
if (enforceMajorVersion) {
ensureMajorVersionBarrier(node.getVersion(), minClusterNodeVersion);
}
ensureNodesCompatibility(node.getVersion(), minClusterNodeVersion, maxClusterNodeVersion);
// we do this validation quite late to prevent race conditions between nodes joining and importing dangling indices
// we have to reject nodes that don't support all indices we have in this cluster
ensureIndexCompatibility(node.getVersion(), currentState.getMetadata());
nodesBuilder.add(node);
nodesChanged = true;
minClusterNodeVersion = Version.min(minClusterNodeVersion, node.getVersion());
maxClusterNodeVersion = Version.max(maxClusterNodeVersion, node.getVersion());
if (node.isMasterNode()) {
joiniedNodeNameIds.put(node.getName(), node.getId());
}
} catch (IllegalArgumentException | IllegalStateException e) {
results.failure(joinTask, e);
continue;
}
}
results.success(joinTask);
}
if (nodesChanged) {
rerouteService.reroute("post-join reroute", Priority.HIGH, ActionListener.wrap(r -> logger.trace("post-join reroute completed"), e -> logger.debug("post-join reroute failed", e)));
if (joiniedNodeNameIds.isEmpty() == false) {
Set<CoordinationMetadata.VotingConfigExclusion> currentVotingConfigExclusions = currentState.getVotingConfigExclusions();
Set<CoordinationMetadata.VotingConfigExclusion> newVotingConfigExclusions = currentVotingConfigExclusions.stream().map(e -> {
// Update nodeId in VotingConfigExclusion when a new node with excluded node name joins
if (CoordinationMetadata.VotingConfigExclusion.MISSING_VALUE_MARKER.equals(e.getNodeId()) && joiniedNodeNameIds.containsKey(e.getNodeName())) {
return new CoordinationMetadata.VotingConfigExclusion(joiniedNodeNameIds.get(e.getNodeName()), e.getNodeName());
} else {
return e;
}
}).collect(Collectors.toSet());
// if VotingConfigExclusions did get updated
if (newVotingConfigExclusions.equals(currentVotingConfigExclusions) == false) {
CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder(currentState.coordinationMetadata()).clearVotingConfigExclusions();
newVotingConfigExclusions.forEach(coordMetadataBuilder::addVotingConfigExclusion);
Metadata newMetadata = Metadata.builder(currentState.metadata()).coordinationMetadata(coordMetadataBuilder.build()).build();
return results.build(allocationService.adaptAutoExpandReplicas(newState.nodes(nodesBuilder).metadata(newMetadata).build()));
}
}
return results.build(allocationService.adaptAutoExpandReplicas(newState.nodes(nodesBuilder).build()));
} else {
// for the joining node to finalize its join and set us as a master
return results.build(newState.build());
}
}
use of org.opensearch.cluster.NotMasterException in project OpenSearch by opensearch-project.
the class BatchedRerouteService method reroute.
/**
* Initiates a reroute.
*/
@Override
public final void reroute(String reason, Priority priority, ActionListener<ClusterState> listener) {
final List<ActionListener<ClusterState>> currentListeners;
synchronized (mutex) {
if (pendingRerouteListeners != null) {
if (priority.sameOrAfter(pendingTaskPriority)) {
logger.trace("already has pending reroute at priority [{}], adding [{}] with priority [{}] to batch", pendingTaskPriority, reason, priority);
pendingRerouteListeners.add(listener);
return;
} else {
logger.trace("already has pending reroute at priority [{}], promoting batch to [{}] and adding [{}]", pendingTaskPriority, priority, reason);
currentListeners = new ArrayList<>(1 + pendingRerouteListeners.size());
currentListeners.add(listener);
currentListeners.addAll(pendingRerouteListeners);
pendingRerouteListeners.clear();
pendingRerouteListeners = currentListeners;
pendingTaskPriority = priority;
}
} else {
logger.trace("no pending reroute, scheduling reroute [{}] at priority [{}]", reason, priority);
currentListeners = new ArrayList<>(1);
currentListeners.add(listener);
pendingRerouteListeners = currentListeners;
pendingTaskPriority = priority;
}
}
try {
clusterService.submitStateUpdateTask(CLUSTER_UPDATE_TASK_SOURCE + "(" + reason + ")", new ClusterStateUpdateTask(priority) {
@Override
public ClusterState execute(ClusterState currentState) {
final boolean currentListenersArePending;
synchronized (mutex) {
assert currentListeners.isEmpty() == (pendingRerouteListeners != currentListeners) : "currentListeners=" + currentListeners + ", pendingRerouteListeners=" + pendingRerouteListeners;
currentListenersArePending = pendingRerouteListeners == currentListeners;
if (currentListenersArePending) {
pendingRerouteListeners = null;
}
}
if (currentListenersArePending) {
logger.trace("performing batched reroute [{}]", reason);
return reroute.apply(currentState, reason);
} else {
logger.trace("batched reroute [{}] was promoted", reason);
return currentState;
}
}
@Override
public void onNoLongerMaster(String source) {
synchronized (mutex) {
if (pendingRerouteListeners == currentListeners) {
pendingRerouteListeners = null;
}
}
ActionListener.onFailure(currentListeners, new NotMasterException("delayed reroute [" + reason + "] cancelled"));
// no big deal, the new master will reroute again
}
@Override
public void onFailure(String source, Exception e) {
synchronized (mutex) {
if (pendingRerouteListeners == currentListeners) {
pendingRerouteListeners = null;
}
}
final ClusterState state = clusterService.state();
if (logger.isTraceEnabled()) {
logger.error(() -> new ParameterizedMessage("unexpected failure during [{}], current state:\n{}", source, state), e);
} else {
logger.error(() -> new ParameterizedMessage("unexpected failure during [{}], current state version [{}]", source, state.version()), e);
}
ActionListener.onFailure(currentListeners, new OpenSearchException("delayed reroute [" + reason + "] failed", e));
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
ActionListener.onResponse(currentListeners, newState);
}
});
} catch (Exception e) {
synchronized (mutex) {
assert currentListeners.isEmpty() == (pendingRerouteListeners != currentListeners);
if (pendingRerouteListeners == currentListeners) {
pendingRerouteListeners = null;
}
}
ClusterState state = clusterService.state();
logger.warn(() -> new ParameterizedMessage("failed to reroute routing table, current state:\n{}", state), e);
ActionListener.onFailure(currentListeners, new OpenSearchException("delayed reroute [" + reason + "] could not be submitted", e));
}
}
use of org.opensearch.cluster.NotMasterException in project OpenSearch by opensearch-project.
the class TransportClusterHealthAction method waitForEventsAndExecuteHealth.
private void waitForEventsAndExecuteHealth(final ClusterHealthRequest request, final ActionListener<ClusterHealthResponse> listener, final int waitCount, final long endTimeRelativeMillis) {
assert request.waitForEvents() != null;
if (request.local()) {
clusterService.submitStateUpdateTask("cluster_health (wait_for_events [" + request.waitForEvents() + "])", new LocalClusterUpdateTask(request.waitForEvents()) {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) {
return unchanged();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
final long timeoutInMillis = Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis());
final TimeValue newTimeout = TimeValue.timeValueMillis(timeoutInMillis);
request.timeout(newTimeout);
executeHealth(request, clusterService.state(), listener, waitCount, observedState -> waitForEventsAndExecuteHealth(request, listener, waitCount, endTimeRelativeMillis));
}
@Override
public void onFailure(String source, Exception e) {
logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
listener.onFailure(e);
}
});
} else {
final TimeValue taskTimeout = TimeValue.timeValueMillis(Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis()));
clusterService.submitStateUpdateTask("cluster_health (wait_for_events [" + request.waitForEvents() + "])", new ClusterStateUpdateTask(request.waitForEvents()) {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public TimeValue timeout() {
return taskTimeout;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
final long timeoutInMillis = Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis());
final TimeValue newTimeout = TimeValue.timeValueMillis(timeoutInMillis);
request.timeout(newTimeout);
// we must use the state from the applier service, because if the state-not-recovered block is in place then the
// applier service has a different view of the cluster state from the one supplied here
final ClusterState appliedState = clusterService.state();
assert newState.stateUUID().equals(appliedState.stateUUID()) : newState.stateUUID() + " vs " + appliedState.stateUUID();
executeHealth(request, appliedState, listener, waitCount, observedState -> waitForEventsAndExecuteHealth(request, listener, waitCount, endTimeRelativeMillis));
}
@Override
public void onNoLongerMaster(String source) {
logger.trace("stopped being master while waiting for events with priority [{}]. retrying.", request.waitForEvents());
// TransportMasterNodeAction implements the retry logic, which is triggered by passing a NotMasterException
listener.onFailure(new NotMasterException("no longer master. source: [" + source + "]"));
}
@Override
public void onFailure(String source, Exception e) {
if (e instanceof ProcessClusterEventTimeoutException) {
listener.onResponse(getResponse(request, clusterService.state(), waitCount, TimeoutState.TIMED_OUT));
} else {
logger.error(() -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
listener.onFailure(e);
}
}
});
}
}
use of org.opensearch.cluster.NotMasterException in project OpenSearch by opensearch-project.
the class TransportClusterStateAction method masterOperation.
@Override
protected void masterOperation(final ClusterStateRequest request, final ClusterState state, final ActionListener<ClusterStateResponse> listener) throws IOException {
final Predicate<ClusterState> acceptableClusterStatePredicate = request.waitForMetadataVersion() == null ? clusterState -> true : clusterState -> clusterState.metadata().version() >= request.waitForMetadataVersion();
final Predicate<ClusterState> acceptableClusterStateOrNotMasterPredicate = request.local() ? acceptableClusterStatePredicate : acceptableClusterStatePredicate.or(clusterState -> clusterState.nodes().isLocalNodeElectedMaster() == false);
if (acceptableClusterStatePredicate.test(state)) {
ActionListener.completeWith(listener, () -> buildResponse(request, state));
} else {
assert acceptableClusterStateOrNotMasterPredicate.test(state) == false;
new ClusterStateObserver(state, clusterService, request.waitForTimeout(), logger, threadPool.getThreadContext()).waitForNextChange(new ClusterStateObserver.Listener() {
@Override
public void onNewClusterState(ClusterState newState) {
if (acceptableClusterStatePredicate.test(newState)) {
ActionListener.completeWith(listener, () -> buildResponse(request, newState));
} else {
listener.onFailure(new NotMasterException("master stepped down waiting for metadata version " + request.waitForMetadataVersion()));
}
}
@Override
public void onClusterServiceClose() {
listener.onFailure(new NodeClosedException(clusterService.localNode()));
}
@Override
public void onTimeout(TimeValue timeout) {
try {
listener.onResponse(new ClusterStateResponse(state.getClusterName(), null, true));
} catch (Exception e) {
listener.onFailure(e);
}
}
}, acceptableClusterStateOrNotMasterPredicate);
}
}
use of org.opensearch.cluster.NotMasterException in project OpenSearch by opensearch-project.
the class TransportMasterNodeActionTests method testMasterFailoverAfterStepDown.
public void testMasterFailoverAfterStepDown() throws ExecutionException, InterruptedException {
Request request = new Request().masterNodeTimeout(TimeValue.timeValueHours(1));
PlainActionFuture<Response> listener = new PlainActionFuture<>();
final Response response = new Response();
setState(clusterService, ClusterStateCreationUtils.state(localNode, localNode, allNodes));
new Action("internal:testAction", transportService, clusterService, threadPool) {
@Override
protected void masterOperation(Request request, ClusterState state, ActionListener<Response> listener) throws Exception {
// The other node has become master, simulate failures of this node while publishing cluster state through ZenDiscovery
setState(clusterService, ClusterStateCreationUtils.state(localNode, remoteNode, allNodes));
Exception failure = randomBoolean() ? new FailedToCommitClusterStateException("Fake error") : new NotMasterException("Fake error");
listener.onFailure(failure);
}
}.execute(request, listener);
assertThat(transport.capturedRequests().length, equalTo(1));
CapturingTransport.CapturedRequest capturedRequest = transport.capturedRequests()[0];
assertTrue(capturedRequest.node.isMasterNode());
assertThat(capturedRequest.request, equalTo(request));
assertThat(capturedRequest.action, equalTo("internal:testAction"));
transport.handleResponse(capturedRequest.requestId, response);
assertTrue(listener.isDone());
assertThat(listener.get(), equalTo(response));
}
Aggregations