use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class UnicastZenPing method resolveHostsLists.
/**
* Resolves a list of hosts to a list of discovery nodes. Each host is resolved into a transport address (or a collection of addresses
* if the number of ports is greater than one) and the transport addresses are used to created discovery nodes. Host lookups are done
* in parallel using specified executor service up to the specified resolve timeout.
*
* @param executorService the executor service used to parallelize hostname lookups
* @param logger logger used for logging messages regarding hostname lookups
* @param hosts the hosts to resolve
* @param limitPortCounts the number of ports to resolve (should be 1 for non-local transport)
* @param transportService the transport service
* @param nodeId_prefix a prefix to use for node ids
* @param resolveTimeout the timeout before returning from hostname lookups
* @return a list of discovery nodes with resolved transport addresses
*/
public static List<DiscoveryNode> resolveHostsLists(final ExecutorService executorService, final Logger logger, final List<String> hosts, final int limitPortCounts, final TransportService transportService, final String nodeId_prefix, final TimeValue resolveTimeout) throws InterruptedException {
Objects.requireNonNull(executorService);
Objects.requireNonNull(logger);
Objects.requireNonNull(hosts);
Objects.requireNonNull(transportService);
Objects.requireNonNull(nodeId_prefix);
Objects.requireNonNull(resolveTimeout);
if (resolveTimeout.nanos() < 0) {
throw new IllegalArgumentException("resolve timeout must be non-negative but was [" + resolveTimeout + "]");
}
// create tasks to submit to the executor service; we will wait up to resolveTimeout for these tasks to complete
final List<Callable<TransportAddress[]>> callables = hosts.stream().map(hn -> (Callable<TransportAddress[]>) () -> transportService.addressesFromString(hn, limitPortCounts)).collect(Collectors.toList());
final List<Future<TransportAddress[]>> futures = executorService.invokeAll(callables, resolveTimeout.nanos(), TimeUnit.NANOSECONDS);
final List<DiscoveryNode> discoveryNodes = new ArrayList<>();
final Set<TransportAddress> localAddresses = new HashSet<>();
localAddresses.add(transportService.boundAddress().publishAddress());
localAddresses.addAll(Arrays.asList(transportService.boundAddress().boundAddresses()));
// ExecutorService#invokeAll guarantees that the futures are returned in the iteration order of the tasks so we can associate the
// hostname with the corresponding task by iterating together
final Iterator<String> it = hosts.iterator();
for (final Future<TransportAddress[]> future : futures) {
final String hostname = it.next();
if (!future.isCancelled()) {
assert future.isDone();
try {
final TransportAddress[] addresses = future.get();
logger.trace("resolved host [{}] to {}", hostname, addresses);
for (int addressId = 0; addressId < addresses.length; addressId++) {
final TransportAddress address = addresses[addressId];
// no point in pinging ourselves
if (localAddresses.contains(address) == false) {
discoveryNodes.add(new DiscoveryNode(nodeId_prefix + hostname + "_" + addressId + "#", address, emptyMap(), emptySet(), Version.CURRENT.minimumCompatibilityVersion()));
}
}
} catch (final ExecutionException e) {
assert e.getCause() != null;
final String message = "failed to resolve host [" + hostname + "]";
logger.warn(message, e.getCause());
}
} else {
logger.warn("timed out after [{}] resolving host [{}]", resolveTimeout, hostname);
}
}
return discoveryNodes;
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class ZenDiscovery method doStop.
@Override
protected void doStop() {
joinThreadControl.stop();
masterFD.stop("zen disco stop");
nodesFD.stop();
// stop any ongoing pinging
Releasables.close(zenPing);
DiscoveryNodes nodes = nodes();
if (sendLeaveRequest) {
if (nodes.getMasterNode() == null) {
// if we don't know who the master is, nothing to do here
} else if (!nodes.isLocalNodeElectedMaster()) {
try {
membership.sendLeaveRequestBlocking(nodes.getMasterNode(), nodes.getLocalNode(), TimeValue.timeValueSeconds(1));
} catch (Exception e) {
logger.debug((Supplier<?>) () -> new ParameterizedMessage("failed to send leave request to master [{}]", nodes.getMasterNode()), e);
}
} else {
// we're master -> let other potential master we left and start a master election now rather then wait for masterFD
DiscoveryNode[] possibleMasters = electMaster.nextPossibleMasters(nodes.getNodes().values(), 5);
for (DiscoveryNode possibleMaster : possibleMasters) {
if (nodes.getLocalNode().equals(possibleMaster)) {
continue;
}
try {
membership.sendLeaveRequest(nodes.getLocalNode(), possibleMaster);
} catch (Exception e) {
logger.debug((Supplier<?>) () -> new ParameterizedMessage("failed to send leave request from master [{}] to possible master [{}]", nodes.getMasterNode(), possibleMaster), e);
}
}
}
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class ZenDiscovery method innerJoinCluster.
/**
* the main function of a join thread. This function is guaranteed to join the cluster
* or spawn a new join thread upon failure to do so.
*/
private void innerJoinCluster() {
DiscoveryNode masterNode = null;
final Thread currentThread = Thread.currentThread();
nodeJoinController.startElectionContext();
while (masterNode == null && joinThreadControl.joinThreadActive(currentThread)) {
masterNode = findMaster();
}
if (!joinThreadControl.joinThreadActive(currentThread)) {
logger.trace("thread is no longer in currentJoinThread. Stopping.");
return;
}
if (clusterService.localNode().equals(masterNode)) {
// we count as one
final int requiredJoins = Math.max(0, electMaster.minimumMasterNodes() - 1);
logger.debug("elected as master, waiting for incoming joins ([{}] needed)", requiredJoins);
nodeJoinController.waitToBeElectedAsMaster(requiredJoins, masterElectionWaitForJoinsTimeout, new NodeJoinController.ElectionCallback() {
@Override
public void onElectedAsMaster(ClusterState state) {
joinThreadControl.markThreadAsDone(currentThread);
// we only starts nodesFD if we are master (it may be that we received a cluster state while pinging)
// start the nodes FD
nodesFD.updateNodesAndPing(state);
}
@Override
public void onFailure(Throwable t) {
logger.trace("failed while waiting for nodes to join, rejoining", t);
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
}
});
} else {
// process any incoming joins (they will fail because we are not the master)
nodeJoinController.stopElectionContext(masterNode + " elected");
// send join request
final boolean success = joinElectedMaster(masterNode);
// finalize join through the cluster state update thread
final DiscoveryNode finalMasterNode = masterNode;
clusterService.submitStateUpdateTask("finalize_join (" + masterNode + ")", new LocalClusterUpdateTask() {
@Override
public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) throws Exception {
if (!success) {
// failed to join. Try again...
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
return unchanged();
}
if (currentState.getNodes().getMasterNode() == null) {
// Post 1.3.0, the master should publish a new cluster state before acking our join request. we now should have
// a valid master.
logger.debug("no master node is set, despite of join request completing. retrying pings.");
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
return unchanged();
}
if (!currentState.getNodes().getMasterNode().equals(finalMasterNode)) {
return joinThreadControl.stopRunningThreadAndRejoin(currentState, "master_switched_while_finalizing_join");
}
// Note: we do not have to start master fault detection here because it's set at {@link #processNextPendingClusterState }
// when the first cluster state arrives.
joinThreadControl.markThreadAsDone(currentThread);
return unchanged();
}
@Override
public void onFailure(String source, @Nullable Exception e) {
logger.error("unexpected error while trying to finalize cluster join", e);
joinThreadControl.markThreadAsDoneAndStartNew(currentThread);
}
});
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class ZenDiscovery method findMaster.
private DiscoveryNode findMaster() {
logger.trace("starting to ping");
List<ZenPing.PingResponse> fullPingResponses = pingAndWait(pingTimeout).toList();
if (fullPingResponses == null) {
logger.trace("No full ping responses");
return null;
}
if (logger.isTraceEnabled()) {
StringBuilder sb = new StringBuilder();
if (fullPingResponses.size() == 0) {
sb.append(" {none}");
} else {
for (ZenPing.PingResponse pingResponse : fullPingResponses) {
sb.append("\n\t--> ").append(pingResponse);
}
}
logger.trace("full ping responses:{}", sb);
}
final DiscoveryNode localNode = clusterService.localNode();
// add our selves
assert fullPingResponses.stream().map(ZenPing.PingResponse::node).filter(n -> n.equals(localNode)).findAny().isPresent() == false;
fullPingResponses.add(new ZenPing.PingResponse(localNode, null, clusterService.state()));
// filter responses
final List<ZenPing.PingResponse> pingResponses = filterPingResponses(fullPingResponses, masterElectionIgnoreNonMasters, logger);
List<DiscoveryNode> activeMasters = new ArrayList<>();
for (ZenPing.PingResponse pingResponse : pingResponses) {
// any check / verifications from other nodes in ZenDiscover#innerJoinCluster()
if (pingResponse.master() != null && !localNode.equals(pingResponse.master())) {
activeMasters.add(pingResponse.master());
}
}
// nodes discovered during pinging
List<ElectMasterService.MasterCandidate> masterCandidates = new ArrayList<>();
for (ZenPing.PingResponse pingResponse : pingResponses) {
if (pingResponse.node().isMasterNode()) {
masterCandidates.add(new ElectMasterService.MasterCandidate(pingResponse.node(), pingResponse.getClusterStateVersion()));
}
}
if (activeMasters.isEmpty()) {
if (electMaster.hasEnoughCandidates(masterCandidates)) {
final ElectMasterService.MasterCandidate winner = electMaster.electMaster(masterCandidates);
logger.trace("candidate {} won election", winner);
return winner.getNode();
} else {
// if we don't have enough master nodes, we bail, because there are not enough master to elect from
logger.trace("not enough master nodes [{}]", masterCandidates);
return null;
}
} else {
assert !activeMasters.contains(localNode) : "local node should never be elected as master when other nodes indicate an active master";
// lets tie break between discovered nodes
return electMaster.tieBreakActiveMasters(activeMasters);
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class PendingClusterStatesQueue method markAsProcessed.
/**
* indicates that a cluster state was successfully processed. Any committed state that is
* {@link ClusterState#supersedes(ClusterState)}-ed by the processed state will be marked as processed as well.
* <p>
* NOTE: successfully processing a state indicates we are following the master it came from. Any committed state
* from another master will be failed by this method
*/
public synchronized void markAsProcessed(ClusterState state) {
if (findState(state.stateUUID()) == null) {
throw new IllegalStateException("can't resolve processed cluster state with uuid [" + state.stateUUID() + "], version [" + state.version() + "]");
}
final DiscoveryNode currentMaster = state.nodes().getMasterNode();
assert currentMaster != null : "processed cluster state mast have a master. " + state;
// fail or remove any incoming state from a different master
// respond to any committed state from the same master with same or lower version (we processed a higher version)
ArrayList<ClusterStateContext> contextsToRemove = new ArrayList<>();
for (int index = 0; index < pendingStates.size(); index++) {
final ClusterStateContext pendingContext = pendingStates.get(index);
final ClusterState pendingState = pendingContext.state;
final DiscoveryNode pendingMasterNode = pendingState.nodes().getMasterNode();
if (Objects.equals(currentMaster, pendingMasterNode) == false) {
contextsToRemove.add(pendingContext);
if (pendingContext.committed()) {
// this is a committed state , warn
logger.warn("received a cluster state (uuid[{}]/v[{}]) from a different master than the current one," + " rejecting (received {}, current {})", pendingState.stateUUID(), pendingState.version(), pendingMasterNode, currentMaster);
pendingContext.listener.onNewClusterStateFailed(new IllegalStateException("cluster state from a different master than the current one," + " rejecting (received " + pendingMasterNode + ", current " + currentMaster + ")"));
} else {
logger.trace("removing non-committed state with uuid[{}]/v[{}] from [{}] - a state from" + " [{}] was successfully processed", pendingState.stateUUID(), pendingState.version(), pendingMasterNode, currentMaster);
}
} else if (pendingState.stateUUID().equals(state.stateUUID())) {
assert pendingContext.committed() : "processed cluster state is not committed " + state;
contextsToRemove.add(pendingContext);
pendingContext.listener.onNewClusterStateProcessed();
} else if (state.version() >= pendingState.version()) {
logger.trace("processing pending state uuid[{}]/v[{}] together with state uuid[{}]/v[{}]", pendingState.stateUUID(), pendingState.version(), state.stateUUID(), state.version());
contextsToRemove.add(pendingContext);
if (pendingContext.committed()) {
pendingContext.listener.onNewClusterStateProcessed();
}
}
}
// now ack the processed state
pendingStates.removeAll(contextsToRemove);
assert findState(state.stateUUID()) == null : "state was marked as processed but can still be found in pending list " + state;
}
Aggregations