use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class Coordinator method publish.
@Override
public void publish(ClusterChangedEvent clusterChangedEvent, ActionListener<Void> publishListener, AckListener ackListener) {
try {
synchronized (mutex) {
if (mode != Mode.LEADER || getCurrentTerm() != clusterChangedEvent.state().term()) {
LOGGER.debug(() -> new ParameterizedMessage("[{}] failed publication as node is no longer master for term {}", clusterChangedEvent.source(), clusterChangedEvent.state().term()));
publishListener.onFailure(new FailedToCommitClusterStateException("node is no longer master for term " + clusterChangedEvent.state().term() + " while handling publication"));
return;
}
if (currentPublication.isPresent()) {
assert false : "[" + currentPublication.get() + "] in progress, cannot start new publication";
LOGGER.warn(() -> new ParameterizedMessage("[{}] failed publication as already publication in progress", clusterChangedEvent.source()));
publishListener.onFailure(new FailedToCommitClusterStateException("publication " + currentPublication.get() + " already in progress"));
return;
}
assert assertPreviousStateConsistency(clusterChangedEvent);
final ClusterState clusterState = clusterChangedEvent.state();
assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())) : getLocalNode() + " should be in published " + clusterState;
final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext(clusterChangedEvent);
final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState);
final CoordinatorPublication publication = new CoordinatorPublication(publishRequest, publicationContext, new ListenableFuture<>(), ackListener, publishListener);
currentPublication = Optional.of(publication);
final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes();
leaderChecker.setCurrentNodes(publishNodes);
followersChecker.setCurrentNodes(publishNodes);
lagDetector.setTrackedNodes(publishNodes);
publication.start(followersChecker.getFaultyNodes());
}
} catch (Exception e) {
LOGGER.debug(() -> new ParameterizedMessage("[{}] publishing failed", clusterChangedEvent.source()), e);
publishListener.onFailure(new FailedToCommitClusterStateException("publishing failed", e));
}
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class Coordinator method clusterStateWithNoMasterBlock.
private ClusterState clusterStateWithNoMasterBlock(ClusterState clusterState) {
if (clusterState.nodes().getMasterNodeId() != null) {
// remove block if it already exists before adding new one
assert clusterState.blocks().hasGlobalBlockWithId(NO_MASTER_BLOCK_ID) == false : "NO_MASTER_BLOCK should only be added by Coordinator";
final ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()).addGlobalBlock(noMasterBlockService.getNoMasterBlock()).build();
final DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build();
return ClusterState.builder(clusterState).blocks(clusterBlocks).nodes(discoveryNodes).build();
} else {
return clusterState;
}
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class VerifyNodeRepositoryAction method verify.
public void verify(String repository, boolean readOnly, String verificationToken, final ActionListener<List<DiscoveryNode>> listener) {
final DiscoveryNodes discoNodes = clusterService.state().nodes();
final DiscoveryNode localNode = discoNodes.getLocalNode();
final ObjectContainer<DiscoveryNode> masterAndDataNodes = discoNodes.getMasterAndDataNodes().values();
final List<DiscoveryNode> nodes = new ArrayList<>();
for (ObjectCursor<DiscoveryNode> cursor : masterAndDataNodes) {
DiscoveryNode node = cursor.value;
if (readOnly && node.getVersion().before(Version.V_4_2_0)) {
continue;
}
nodes.add(node);
}
final CopyOnWriteArrayList<VerificationFailure> errors = new CopyOnWriteArrayList<>();
final AtomicInteger counter = new AtomicInteger(nodes.size());
for (final DiscoveryNode node : nodes) {
if (node.equals(localNode)) {
try {
doVerify(repository, verificationToken, localNode);
} catch (Exception e) {
LOGGER.warn(() -> new ParameterizedMessage("[{}] failed to verify repository", repository), e);
errors.add(new VerificationFailure(node.getId(), e));
}
if (counter.decrementAndGet() == 0) {
finishVerification(repository, listener, nodes, errors);
}
} else {
transportService.sendRequest(node, ACTION_NAME, new VerifyNodeRepositoryRequest(repository, verificationToken), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override
public void handleResponse(TransportResponse.Empty response) {
if (counter.decrementAndGet() == 0) {
finishVerification(repository, listener, nodes, errors);
}
}
@Override
public void handleException(TransportException exp) {
errors.add(new VerificationFailure(node.getId(), exp));
if (counter.decrementAndGet() == 0) {
finishVerification(repository, listener, nodes, errors);
}
}
});
}
}
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class SnapshotsService method processSnapshotsOnRemovedNodes.
/**
* Cleans up shard snapshots that were running on removed nodes
*/
private void processSnapshotsOnRemovedNodes() {
clusterService.submitStateUpdateTask("update snapshot state after node removal", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
DiscoveryNodes nodes = currentState.nodes();
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots == null) {
return currentState;
}
boolean changed = false;
ArrayList<SnapshotsInProgress.Entry> entries = new ArrayList<>();
for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) {
SnapshotsInProgress.Entry updatedSnapshot = snapshot;
if (snapshot.state() == State.STARTED || snapshot.state() == State.ABORTED) {
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
boolean snapshotChanged = false;
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshot.shards()) {
final ShardSnapshotStatus shardStatus = shardEntry.value;
final ShardId shardId = shardEntry.key;
if (!shardStatus.state().completed() && shardStatus.nodeId() != null) {
if (nodes.nodeExists(shardStatus.nodeId())) {
shards.put(shardId, shardStatus);
} else {
// TODO: Restart snapshot on another node?
snapshotChanged = true;
LOGGER.warn("failing snapshot of shard [{}] on closed node [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), ShardState.FAILED, "node shutdown", shardStatus.generation()));
}
} else {
shards.put(shardId, shardStatus);
}
}
if (snapshotChanged) {
changed = true;
ImmutableOpenMap<ShardId, ShardSnapshotStatus> shardsMap = shards.build();
if (!snapshot.state().completed() && completed(shardsMap.values())) {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.SUCCESS, shardsMap);
} else {
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, snapshot.state(), shardsMap);
}
}
entries.add(updatedSnapshot);
} else if (snapshot.state() == State.INIT && initializingSnapshots.contains(snapshot.snapshot()) == false) {
changed = true;
// Mark the snapshot as aborted as it failed to start from the previous master
updatedSnapshot = new SnapshotsInProgress.Entry(snapshot, State.ABORTED, snapshot.shards());
entries.add(updatedSnapshot);
// Clean up the snapshot that failed to start from the old master
deleteSnapshot(snapshot.snapshot(), new ActionListener<Void>() {
@Override
public void onResponse(Void aVoid) {
LOGGER.debug("cleaned up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
@Override
public void onFailure(Exception e) {
LOGGER.warn("failed to clean up abandoned snapshot {} in INIT state", snapshot.snapshot());
}
}, updatedSnapshot.repositoryStateId(), false);
}
assert updatedSnapshot.shards().size() == snapshot.shards().size() : "Shard count changed during snapshot status update from [" + snapshot + "] to [" + updatedSnapshot + "]";
}
if (changed) {
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, new SnapshotsInProgress(unmodifiableList(entries))).build();
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
LOGGER.warn("failed to update snapshot state after node removal");
}
});
}
use of org.elasticsearch.cluster.node.DiscoveryNodes in project crate by crate.
the class GatewayService method clusterChanged.
@Override
public void clusterChanged(final ClusterChangedEvent event) {
if (lifecycle.stoppedOrClosed()) {
return;
}
final ClusterState state = event.state();
if (state.nodes().isLocalNodeElectedMaster() == false) {
// not our job to recover
return;
}
if (state.blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) {
// already recovered
return;
}
final DiscoveryNodes nodes = state.nodes();
if (state.nodes().getMasterNodeId() == null) {
LOGGER.debug("not recovering from gateway, no master elected yet");
} else if (recoverAfterNodes != -1 && (nodes.getMasterAndDataNodes().size()) < recoverAfterNodes) {
LOGGER.debug("not recovering from gateway, nodes_size (data+master) [{}] < recover_after_nodes [{}]", nodes.getMasterAndDataNodes().size(), recoverAfterNodes);
} else if (recoverAfterDataNodes != -1 && nodes.getDataNodes().size() < recoverAfterDataNodes) {
LOGGER.debug("not recovering from gateway, nodes_size (data) [{}] < recover_after_data_nodes [{}]", nodes.getDataNodes().size(), recoverAfterDataNodes);
} else if (recoverAfterMasterNodes != -1 && nodes.getMasterNodes().size() < recoverAfterMasterNodes) {
LOGGER.debug("not recovering from gateway, nodes_size (master) [{}] < recover_after_master_nodes [{}]", nodes.getMasterNodes().size(), recoverAfterMasterNodes);
} else {
boolean enforceRecoverAfterTime;
String reason;
if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
// no expected is set, honor the setting if they are there
enforceRecoverAfterTime = true;
reason = "recover_after_time was set to [" + recoverAfterTime + "]";
} else {
// one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
enforceRecoverAfterTime = false;
reason = "";
if (expectedNodes != -1 && (nodes.getMasterAndDataNodes().size() < expectedNodes)) {
// does not meet the expected...
enforceRecoverAfterTime = true;
reason = "expecting [" + expectedNodes + "] nodes, but only have [" + nodes.getMasterAndDataNodes().size() + "]";
} else if (expectedDataNodes != -1 && (nodes.getDataNodes().size() < expectedDataNodes)) {
// does not meet the expected...
enforceRecoverAfterTime = true;
reason = "expecting [" + expectedDataNodes + "] data nodes, but only have [" + nodes.getDataNodes().size() + "]";
} else if (expectedMasterNodes != -1 && (nodes.getMasterNodes().size() < expectedMasterNodes)) {
// does not meet the expected...
enforceRecoverAfterTime = true;
reason = "expecting [" + expectedMasterNodes + "] master nodes, but only have [" + nodes.getMasterNodes().size() + "]";
}
}
performStateRecovery(enforceRecoverAfterTime, reason);
}
}
Aggregations