use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class AsyncShardFetch method fetchData.
/**
* Fetches the data for the relevant shard. If there any ongoing async fetches going on, or new ones have
* been initiated by this call, the result will have no data.
* <p>
* The ignoreNodes are nodes that are supposed to be ignored for this round, since fetching is async, we need
* to keep them around and make sure we add them back when all the responses are fetched and returned.
*/
public synchronized FetchResult<T> fetchData(DiscoveryNodes nodes, Set<String> ignoreNodes) {
if (closed) {
throw new IllegalStateException(shardId + ": can't fetch data on closed async fetch");
}
nodesToIgnore.addAll(ignoreNodes);
fillShardCacheWithDataNodes(cache, nodes);
Set<NodeEntry<T>> nodesToFetch = findNodesToFetch(cache);
if (nodesToFetch.isEmpty() == false) {
// mark all node as fetching and go ahead and async fetch them
for (NodeEntry<T> nodeEntry : nodesToFetch) {
nodeEntry.markAsFetching();
}
DiscoveryNode[] discoNodesToFetch = nodesToFetch.stream().map(NodeEntry::getNodeId).map(nodes::get).toArray(DiscoveryNode[]::new);
asyncFetch(shardId, discoNodesToFetch);
}
// if we are still fetching, return null to indicate it
if (hasAnyNodeFetching(cache)) {
return new FetchResult<>(shardId, null, emptySet(), emptySet());
} else {
// nothing to fetch, yay, build the return value
Map<DiscoveryNode, T> fetchData = new HashMap<>();
Set<String> failedNodes = new HashSet<>();
for (Iterator<Map.Entry<String, NodeEntry<T>>> it = cache.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<String, NodeEntry<T>> entry = it.next();
String nodeId = entry.getKey();
NodeEntry<T> nodeEntry = entry.getValue();
DiscoveryNode node = nodes.get(nodeId);
if (node != null) {
if (nodeEntry.isFailed()) {
// if its failed, remove it from the list of nodes, so if this run doesn't work
// we try again next round to fetch it again
it.remove();
failedNodes.add(nodeEntry.getNodeId());
} else {
if (nodeEntry.getValue() != null) {
fetchData.put(node, nodeEntry.getValue());
}
}
}
}
Set<String> allIgnoreNodes = unmodifiableSet(new HashSet<>(nodesToIgnore));
// clear the nodes to ignore, we had a successful run in fetching everything we can
// we need to try them if another full run is needed
nodesToIgnore.clear();
// here, just case this round won't find anything, and we need to retry fetching data
if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) {
reroute(shardId, "nodes failed [" + failedNodes.size() + "], ignored [" + allIgnoreNodes.size() + "]");
}
return new FetchResult<>(shardId, fetchData, failedNodes, allIgnoreNodes);
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class ElectMasterService method sortedMasterNodes.
private List<DiscoveryNode> sortedMasterNodes(Iterable<DiscoveryNode> nodes) {
List<DiscoveryNode> possibleNodes = CollectionUtils.iterableAsArrayList(nodes);
if (possibleNodes.isEmpty()) {
return null;
}
// clean non master nodes
for (Iterator<DiscoveryNode> it = possibleNodes.iterator(); it.hasNext(); ) {
DiscoveryNode node = it.next();
if (!node.isMasterNode()) {
it.remove();
}
}
CollectionUtil.introSort(possibleNodes, ElectMasterService::compareNodes);
return possibleNodes;
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class IndicesClusterStateService method removeShards.
/**
* Removes shards that are currently loaded by indicesService but have disappeared from the routing table of the current node.
* Also removes shards where the recovery source node has changed.
* This method does not delete the shard data.
*
* @param state new cluster state
*/
private void removeShards(final ClusterState state) {
final RoutingTable routingTable = state.routingTable();
final DiscoveryNodes nodes = state.nodes();
final String localNodeId = state.nodes().getLocalNodeId();
assert localNodeId != null;
// remove shards based on routing nodes (no deletion of data)
RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId);
for (AllocatedIndex<? extends Shard> indexService : indicesService) {
for (Shard shard : indexService) {
ShardRouting currentRoutingEntry = shard.routingEntry();
ShardId shardId = currentRoutingEntry.shardId();
ShardRouting newShardRouting = localRoutingNode == null ? null : localRoutingNode.getByShardId(shardId);
if (newShardRouting == null) {
// we can just remove the shard without cleaning it locally, since we will clean it in IndicesStore
// once all shards are allocated
logger.debug("{} removing shard (not allocated)", shardId);
indexService.removeShard(shardId.id(), "removing shard (not allocated)");
} else if (newShardRouting.isSameAllocation(currentRoutingEntry) == false) {
logger.debug("{} removing shard (stale allocation id, stale {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else if (newShardRouting.initializing() && currentRoutingEntry.active()) {
// this can happen if the node was isolated/gc-ed, rejoins the cluster and a new shard with the same allocation id
// is assigned to it. Batch cluster state processing or if shard fetching completes before the node gets a new cluster
// state may result in a new shard being initialized while having the same allocation id as the currently started shard.
logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else {
// remove shards where recovery source has changed. This re-initializes shards later in createOrUpdateShards
if (newShardRouting.recoverySource() != null && newShardRouting.recoverySource().getType() == Type.PEER) {
RecoveryState recoveryState = shard.recoveryState();
final DiscoveryNode sourceNode = findSourceNodeForPeerRecovery(logger, routingTable, nodes, newShardRouting);
if (recoveryState.getSourceNode().equals(sourceNode) == false) {
if (recoveryTargetService.cancelRecoveriesForShard(shardId, "recovery source node changed")) {
// getting here means that the shard was still recovering
logger.debug("{} removing shard (recovery source changed), current [{}], global [{}], shard [{}])", shardId, recoveryState.getSourceNode(), sourceNode, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (recovery source node changed)");
}
}
}
}
}
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class IndicesClusterStateService method updateFailedShardsCache.
/**
* Removes shard entries from the failed shards cache that are no longer allocated to this node by the master.
* Sends shard failures for shards that are marked as actively allocated to this node but don't actually exist on the node.
* Resends shard failures for shards that are still marked as allocated to this node but previously failed.
*
* @param state new cluster state
*/
private void updateFailedShardsCache(final ClusterState state) {
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
failedShardsCache.clear();
return;
}
DiscoveryNode masterNode = state.nodes().getMasterNode();
// remove items from cache which are not in our routing table anymore and resend failures that have not executed on master yet
for (Iterator<Map.Entry<ShardId, ShardRouting>> iterator = failedShardsCache.entrySet().iterator(); iterator.hasNext(); ) {
ShardRouting failedShardRouting = iterator.next().getValue();
ShardRouting matchedRouting = localRoutingNode.getByShardId(failedShardRouting.shardId());
if (matchedRouting == null || matchedRouting.isSameAllocation(failedShardRouting) == false) {
iterator.remove();
} else {
if (masterNode != null) {
// TODO: can we remove this? Is resending shard failures the responsibility of shardStateAction?
String message = "master " + masterNode + " has not removed previously failed shard. resending shard failure";
logger.trace("[{}] re-sending failed shard [{}], reason [{}]", matchedRouting.shardId(), matchedRouting, message);
shardStateAction.localShardFailed(matchedRouting, message, null, SHARD_STATE_ACTION_LISTENER, state);
}
}
}
}
use of org.elasticsearch.cluster.node.DiscoveryNode in project elasticsearch by elastic.
the class ElasticsearchExceptionTests method testFailureToAndFromXContentWithDetails.
public void testFailureToAndFromXContentWithDetails() throws IOException {
final XContent xContent = randomFrom(XContentType.values()).xContent();
Exception failure;
Throwable failureCause;
ElasticsearchException expected;
ElasticsearchException expectedCause;
ElasticsearchException suppressed;
switch(randomIntBetween(0, 6)) {
case // Simple elasticsearch exception without cause
0:
failure = new NoNodeAvailableException("A");
expected = new ElasticsearchException("Elasticsearch exception [type=no_node_available_exception, reason=A]");
expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=no_node_available_exception, reason=A]"));
break;
case // Simple elasticsearch exception with headers (other metadata of type number are not parsed)
1:
failure = new CircuitBreakingException("B", 5_000, 2_000);
((ElasticsearchException) failure).addHeader("header_name", "0", "1");
expected = new ElasticsearchException("Elasticsearch exception [type=circuit_breaking_exception, reason=B]");
expected.addHeader("header_name", "0", "1");
suppressed = new ElasticsearchException("Elasticsearch exception [type=circuit_breaking_exception, reason=B]");
suppressed.addHeader("header_name", "0", "1");
expected.addSuppressed(suppressed);
break;
case // Elasticsearch exception with a cause, headers and parsable metadata
2:
failureCause = new NullPointerException("var is null");
failure = new ScriptException("C", failureCause, singletonList("stack"), "test", "painless");
((ElasticsearchException) failure).addHeader("script_name", "my_script");
expectedCause = new ElasticsearchException("Elasticsearch exception [type=null_pointer_exception, reason=var is null]");
expected = new ElasticsearchException("Elasticsearch exception [type=script_exception, reason=C]", expectedCause);
expected.addHeader("script_name", "my_script");
expected.addMetadata("es.lang", "painless");
expected.addMetadata("es.script", "test");
expected.addMetadata("es.script_stack", "stack");
suppressed = new ElasticsearchException("Elasticsearch exception [type=script_exception, reason=C]");
suppressed.addHeader("script_name", "my_script");
suppressed.addMetadata("es.lang", "painless");
suppressed.addMetadata("es.script", "test");
suppressed.addMetadata("es.script_stack", "stack");
expected.addSuppressed(suppressed);
break;
case // JDK exception without cause
3:
failure = new IllegalStateException("D");
expected = new ElasticsearchException("Elasticsearch exception [type=illegal_state_exception, reason=D]");
suppressed = new ElasticsearchException("Elasticsearch exception [type=illegal_state_exception, reason=D]");
expected.addSuppressed(suppressed);
break;
case // JDK exception with cause
4:
failureCause = new RoutingMissingException("idx", "type", "id");
failure = new RuntimeException("E", failureCause);
expectedCause = new ElasticsearchException("Elasticsearch exception [type=routing_missing_exception, " + "reason=routing is required for [idx]/[type]/[id]]");
expectedCause.addMetadata("es.index", "idx");
expectedCause.addMetadata("es.index_uuid", "_na_");
expected = new ElasticsearchException("Elasticsearch exception [type=runtime_exception, reason=E]", expectedCause);
suppressed = new ElasticsearchException("Elasticsearch exception [type=runtime_exception, reason=E]");
expected.addSuppressed(suppressed);
break;
case // Wrapped exception with cause
5:
failureCause = new FileAlreadyExistsException("File exists");
failure = new BroadcastShardOperationFailedException(new ShardId("_index", "_uuid", 5), "F", failureCause);
expected = new ElasticsearchException("Elasticsearch exception [type=file_already_exists_exception, reason=File exists]");
// strangely, the wrapped exception appears as the root cause...
suppressed = new ElasticsearchException("Elasticsearch exception [type=broadcast_shard_operation_failed_exception, " + "reason=F]");
expected.addSuppressed(suppressed);
break;
case // SearchPhaseExecutionException with cause and multiple failures
6:
DiscoveryNode node = new DiscoveryNode("node_g", buildNewFakeTransportAddress(), Version.CURRENT);
failureCause = new NodeClosedException(node);
failureCause = new NoShardAvailableActionException(new ShardId("_index_g", "_uuid_g", 6), "node_g", failureCause);
ShardSearchFailure[] shardFailures = new ShardSearchFailure[] { new ShardSearchFailure(new ParsingException(0, 0, "Parsing g", null), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 61))), new ShardSearchFailure(new RepositoryException("repository_g", "Repo"), new SearchShardTarget("node_g", new ShardId(new Index("_index_g", "_uuid_g"), 62))), new ShardSearchFailure(new SearchContextMissingException(0L), null) };
failure = new SearchPhaseExecutionException("phase_g", "G", failureCause, shardFailures);
expectedCause = new ElasticsearchException("Elasticsearch exception [type=node_closed_exception, " + "reason=node closed " + node + "]");
expectedCause = new ElasticsearchException("Elasticsearch exception [type=no_shard_available_action_exception, " + "reason=node_g]", expectedCause);
expectedCause.addMetadata("es.index", "_index_g");
expectedCause.addMetadata("es.index_uuid", "_uuid_g");
expectedCause.addMetadata("es.shard", "6");
expected = new ElasticsearchException("Elasticsearch exception [type=search_phase_execution_exception, " + "reason=G]", expectedCause);
expected.addMetadata("es.phase", "phase_g");
expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=parsing_exception, reason=Parsing g]"));
expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=repository_exception, " + "reason=[repository_g] Repo]"));
expected.addSuppressed(new ElasticsearchException("Elasticsearch exception [type=search_context_missing_exception, " + "reason=No search context found for id [0]]"));
break;
default:
throw new UnsupportedOperationException("Failed to generate randomized failure");
}
Exception finalFailure = failure;
BytesReference failureBytes = XContentHelper.toXContent((builder, params) -> {
ElasticsearchException.generateFailureXContent(builder, params, finalFailure, true);
return builder;
}, xContent.type(), randomBoolean());
try (XContentParser parser = createParser(xContent, failureBytes)) {
failureBytes = shuffleXContent(parser, randomBoolean()).bytes();
}
ElasticsearchException parsedFailure;
try (XContentParser parser = createParser(xContent, failureBytes)) {
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken());
parsedFailure = ElasticsearchException.failureFromXContent(parser);
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
assertNull(parser.nextToken());
}
assertDeepEquals(expected, parsedFailure);
}
Aggregations