use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method updateShard.
private void updateShard(DiscoveryNodes nodes, ShardRouting shardRouting, Shard shard, RoutingTable routingTable, ClusterState clusterState) {
final ShardRouting currentRoutingEntry = shard.routingEntry();
assert currentRoutingEntry.isSameAllocation(shardRouting) : "local shard has a different allocation id but wasn't cleaned by removeShards. " + "cluster state: " + shardRouting + " local: " + currentRoutingEntry;
final long primaryTerm;
try {
final IndexMetadata indexMetadata = clusterState.metadata().index(shard.shardId().getIndex());
primaryTerm = indexMetadata.primaryTerm(shard.shardId().id());
final Set<String> inSyncIds = indexMetadata.inSyncAllocationIds(shard.shardId().id());
final IndexShardRoutingTable indexShardRoutingTable = routingTable.shardRoutingTable(shardRouting.shardId());
shard.updateShardState(shardRouting, primaryTerm, primaryReplicaSyncer::resync, clusterState.version(), inSyncIds, indexShardRoutingTable);
} catch (Exception e) {
failAndRemoveShard(shardRouting, true, "failed updating shard routing entry", e, clusterState);
return;
}
final IndexShardState state = shard.state();
if (shardRouting.initializing() && (state == IndexShardState.STARTED || state == IndexShardState.POST_RECOVERY)) {
// we managed to tell the master we started), mark us as started
if (logger.isTraceEnabled()) {
logger.trace("{} master marked shard as initializing, but shard has state [{}], resending shard started to {}", shardRouting.shardId(), state, nodes.getMasterNode());
}
if (nodes.getMasterNode() != null) {
shardStateAction.shardStarted(shardRouting, primaryTerm, "master " + nodes.getMasterNode() + " marked shard as initializing, but shard state is [" + state + "], mark shard as started", SHARD_STATE_ACTION_LISTENER, clusterState);
}
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method createIndices.
private void createIndices(final ClusterState state) {
// we only create indices for shards that are allocated
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
return;
}
// create map of indices to create with shards to fail if index creation fails
final Map<Index, List<ShardRouting>> indicesToCreate = new HashMap<>();
for (ShardRouting shardRouting : localRoutingNode) {
if (failedShardsCache.containsKey(shardRouting.shardId()) == false) {
final Index index = shardRouting.index();
if (indicesService.indexService(index) == null) {
indicesToCreate.computeIfAbsent(index, k -> new ArrayList<>()).add(shardRouting);
}
}
}
for (Map.Entry<Index, List<ShardRouting>> entry : indicesToCreate.entrySet()) {
final Index index = entry.getKey();
final IndexMetadata indexMetadata = state.metadata().index(index);
logger.debug("[{}] creating index", index);
AllocatedIndex<? extends Shard> indexService = null;
try {
indexService = indicesService.createIndex(indexMetadata, buildInIndexListener, true);
if (indexService.updateMapping(null, indexMetadata) && sendRefreshMapping) {
nodeMappingRefreshAction.nodeMappingRefresh(state.nodes().getMasterNode(), new NodeMappingRefreshAction.NodeMappingRefreshRequest(indexMetadata.getIndex().getName(), indexMetadata.getIndexUUID(), state.nodes().getLocalNodeId()));
}
} catch (Exception e) {
final String failShardReason;
if (indexService == null) {
failShardReason = "failed to create index";
} else {
failShardReason = "failed to update mapping for index";
indicesService.removeIndex(index, FAILURE, "removing index (mapping update failed)");
}
for (ShardRouting shardRouting : entry.getValue()) {
sendFailShard(shardRouting, failShardReason, e, state);
}
}
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method removeShards.
/**
* Removes shards that are currently loaded by indicesService but have disappeared from the routing table of the current node.
* This method does not delete the shard data.
*
* @param state new cluster state
*/
private void removeShards(final ClusterState state) {
final String localNodeId = state.nodes().getLocalNodeId();
assert localNodeId != null;
// remove shards based on routing nodes (no deletion of data)
RoutingNode localRoutingNode = state.getRoutingNodes().node(localNodeId);
for (AllocatedIndex<? extends Shard> indexService : indicesService) {
for (Shard shard : indexService) {
ShardRouting currentRoutingEntry = shard.routingEntry();
ShardId shardId = currentRoutingEntry.shardId();
ShardRouting newShardRouting = localRoutingNode == null ? null : localRoutingNode.getByShardId(shardId);
if (newShardRouting == null) {
// we can just remove the shard without cleaning it locally, since we will clean it in IndicesStore
// once all shards are allocated
logger.debug("{} removing shard (not allocated)", shardId);
indexService.removeShard(shardId.id(), "removing shard (not allocated)");
} else if (newShardRouting.isSameAllocation(currentRoutingEntry) == false) {
logger.debug("{} removing shard (stale allocation id, stale {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else if (newShardRouting.initializing() && currentRoutingEntry.active()) {
// this can happen if the node was isolated/gc-ed, rejoins the cluster and a new shard with the same allocation id
// is assigned to it. Batch cluster state processing or if shard fetching completes before the node gets a new cluster
// state may result in a new shard being initialized while having the same allocation id as the currently started shard.
logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
} else if (newShardRouting.primary() && currentRoutingEntry.primary() == false && newShardRouting.initializing()) {
// see above if clause
assert currentRoutingEntry.initializing() : currentRoutingEntry;
// this can happen when cluster state batching batches activation of the shard, closing an index, reopening it
// and assigning an initializing primary to this node
logger.debug("{} removing shard (not active, current {}, new {})", shardId, currentRoutingEntry, newShardRouting);
indexService.removeShard(shardId.id(), "removing shard (stale copy)");
}
}
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method createOrUpdateShards.
private void createOrUpdateShards(final ClusterState state) {
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
return;
}
DiscoveryNodes nodes = state.nodes();
RoutingTable routingTable = state.routingTable();
for (final ShardRouting shardRouting : localRoutingNode) {
ShardId shardId = shardRouting.shardId();
if (failedShardsCache.containsKey(shardId) == false) {
AllocatedIndex<? extends Shard> indexService = indicesService.indexService(shardId.getIndex());
assert indexService != null : "index " + shardId.getIndex() + " should have been created by createIndices";
Shard shard = indexService.getShardOrNull(shardId.id());
if (shard == null) {
assert shardRouting.initializing() : shardRouting + " should have been removed by failMissingShards";
createShard(nodes, routingTable, shardRouting, state);
} else {
updateShard(nodes, shardRouting, shard, routingTable, state);
}
}
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndicesClusterStateService method updateFailedShardsCache.
/**
* Removes shard entries from the failed shards cache that are no longer allocated to this node by the master.
* Sends shard failures for shards that are marked as actively allocated to this node but don't actually exist on the node.
* Resends shard failures for shards that are still marked as allocated to this node but previously failed.
*
* @param state new cluster state
*/
private void updateFailedShardsCache(final ClusterState state) {
RoutingNode localRoutingNode = state.getRoutingNodes().node(state.nodes().getLocalNodeId());
if (localRoutingNode == null) {
failedShardsCache.clear();
return;
}
DiscoveryNode masterNode = state.nodes().getMasterNode();
// remove items from cache which are not in our routing table anymore and resend failures that have not executed on master yet
for (Iterator<Map.Entry<ShardId, ShardRouting>> iterator = failedShardsCache.entrySet().iterator(); iterator.hasNext(); ) {
ShardRouting failedShardRouting = iterator.next().getValue();
ShardRouting matchedRouting = localRoutingNode.getByShardId(failedShardRouting.shardId());
if (matchedRouting == null || matchedRouting.isSameAllocation(failedShardRouting) == false) {
iterator.remove();
} else {
if (masterNode != null) {
// TODO: can we remove this? Is resending shard failures the responsibility of shardStateAction?
String message = "master " + masterNode + " has not removed previously failed shard. resending shard failure";
logger.trace("[{}] re-sending failed shard [{}], reason [{}]", matchedRouting.shardId(), matchedRouting, message);
shardStateAction.localShardFailed(matchedRouting, message, null, SHARD_STATE_ACTION_LISTENER, state);
}
}
}
}
Aggregations