use of org.elasticsearch.cluster.routing.RoutingNodes in project elasticsearch by elastic.
the class DiskThresholdDeciderTests method logShardStates.
public void logShardStates(ClusterState state) {
RoutingNodes rn = state.getRoutingNodes();
logger.info("--> counts: total: {}, unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shards(shard -> true).size(), rn.shardsWithState(UNASSIGNED).size(), rn.shardsWithState(INITIALIZING).size(), rn.shardsWithState(RELOCATING).size(), rn.shardsWithState(STARTED).size());
logger.info("--> unassigned: {}, initializing: {}, relocating: {}, started: {}", rn.shardsWithState(UNASSIGNED), rn.shardsWithState(INITIALIZING), rn.shardsWithState(RELOCATING), rn.shardsWithState(STARTED));
}
use of org.elasticsearch.cluster.routing.RoutingNodes in project elasticsearch by elastic.
the class ReplicaShardAllocator method makeAllocationDecision.
@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
if (isResponsibleFor(unassignedShard) == false) {
// this allocator is not responsible for deciding on this shard
return AllocateUnassignedDecision.NOT_TAKEN;
}
final RoutingNodes routingNodes = allocation.routingNodes();
final boolean explain = allocation.debugDecision();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
Decision allocateDecision = result.v1();
if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
// only return early if we are not in explain mode, or we are in explain mode but we have not
// yet attempted to fetch any shard data
logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> shardStores = fetchData(unassignedShard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
allocation.setHasPendingAsyncFetch();
List<NodeAllocationResult> nodeDecisions = null;
if (explain) {
nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
}
return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}
ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
if (primaryShard == null) {
assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
}
TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore = findStore(primaryShard, allocation, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
// will try and recover from
// Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
return AllocateUnassignedDecision.NOT_TAKEN;
}
MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, primaryStore, shardStores, explain);
assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
if (allocateDecision.type() != Decision.Type.YES) {
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
} else if (matchingNodes.getNodeWithHighestMatch() != null) {
RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
// we only check on THROTTLE since we checked before before on NO
Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
return AllocateUnassignedDecision.throttle(nodeDecisions);
} else {
logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we found a match
return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
}
} else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
// if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
// unassigned due to a node leaving, so we delay allocation of this replica to see if the
// node with the shard copy will rejoin so we can re-use the copy it has
logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
long remainingDelayMillis = 0L;
long totalDelayMillis = 0L;
if (explain) {
UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
MetaData metadata = allocation.metaData();
IndexMetaData indexMetaData = metadata.index(unassignedShard.index());
totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).getMillis();
long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetaData.getSettings());
remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
}
return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
}
return AllocateUnassignedDecision.NOT_TAKEN;
}
use of org.elasticsearch.cluster.routing.RoutingNodes in project elasticsearch by elastic.
the class ReplicaShardAllocator method processExistingRecoveries.
/**
* Process existing recoveries of replicas and see if we need to cancel them if we find a better
* match. Today, a better match is one that has full sync id match compared to not having one in
* the previous recovery.
*/
public void processExistingRecoveries(RoutingAllocation allocation) {
MetaData metaData = allocation.metaData();
RoutingNodes routingNodes = allocation.routingNodes();
List<Runnable> shardCancellationActions = new ArrayList<>();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shard : routingNode) {
if (shard.primary()) {
continue;
}
if (shard.initializing() == false) {
continue;
}
if (shard.relocatingNodeId() != null) {
continue;
}
// if we are allocating a replica because of index creation, no need to go and find a copy, there isn't one...
if (shard.unassignedInfo() != null && shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) {
continue;
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> shardStores = fetchData(shard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: fetching new stores for initializing shard", shard);
// still fetching
continue;
}
ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard.shardId());
assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
TransportNodesListShardStoreMetaData.StoreFilesMetaData primaryStore = findStore(primaryShard, allocation, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// just let the recovery find it out, no need to do anything about it for the initializing shard
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", shard);
continue;
}
MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, primaryStore, shardStores, false);
if (matchingNodes.getNodeWithHighestMatch() != null) {
DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId());
DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch();
// current node will not be in matchingNodes as it is filtered away by SameShardAllocationDecider
final String currentSyncId;
if (shardStores.getData().containsKey(currentNode)) {
currentSyncId = shardStores.getData().get(currentNode).storeFilesMetaData().syncId();
} else {
currentSyncId = null;
}
if (currentNode.equals(nodeWithHighestMatch) == false && Objects.equals(currentSyncId, primaryStore.syncId()) == false && matchingNodes.isNodeMatchBySyncID(nodeWithHighestMatch)) {
// we found a better match that has a full sync id match, the existing allocation is not fully synced
// so we found a better one, cancel this one
logger.debug("cancelling allocation of replica on [{}], sync id match found on node [{}]", currentNode, nodeWithHighestMatch);
UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node [" + nodeWithHighestMatch + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT);
// don't cancel shard in the loop as it will cause a ConcurrentModificationException
shardCancellationActions.add(() -> routingNodes.failShard(logger, shard, unassignedInfo, metaData.getIndexSafe(shard.index()), allocation.changes()));
}
}
}
}
for (Runnable action : shardCancellationActions) {
action.run();
}
}
use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.
the class AllocateStalePrimaryAllocationCommand method execute.
@Override
public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) {
final DiscoveryNode discoNode;
try {
discoNode = allocation.nodes().resolveNode(node);
} catch (IllegalArgumentException e) {
return explainOrThrowRejectedCommand(explain, allocation, e);
}
final RoutingNodes routingNodes = allocation.routingNodes();
RoutingNode routingNode = routingNodes.node(discoNode.getId());
if (routingNode == null) {
return explainOrThrowMissingRoutingNode(allocation, explain, discoNode);
}
try {
allocation.routingTable().shardRoutingTable(index, shardId).primaryShard();
} catch (IndexNotFoundException | ShardNotFoundException e) {
return explainOrThrowRejectedCommand(explain, allocation, e);
}
ShardRouting shardRouting = null;
for (ShardRouting shard : allocation.routingNodes().unassigned()) {
if (shard.getIndexName().equals(index) && shard.getId() == shardId && shard.primary()) {
shardRouting = shard;
break;
}
}
if (shardRouting == null) {
return explainOrThrowRejectedCommand(explain, allocation, "primary [" + index + "][" + shardId + "] is already assigned");
}
if (acceptDataLoss == false) {
String dataLossWarning = "allocating an empty primary for [" + index + "][" + shardId + "] can result in data loss. Please " + "confirm by setting the accept_data_loss parameter to true";
return explainOrThrowRejectedCommand(explain, allocation, dataLossWarning);
}
if (shardRouting.recoverySource().getType() != RecoverySource.Type.EXISTING_STORE) {
return explainOrThrowRejectedCommand(explain, allocation, "trying to allocate an existing primary shard [" + index + "][" + shardId + "], while no such shard has ever been active");
}
initializeUnassignedShard(allocation, routingNodes, routingNode, shardRouting, null, RecoverySource.ExistingStoreRecoverySource.FORCE_STALE_PRIMARY_INSTANCE);
return new RerouteExplanation(this, allocation.decision(Decision.YES, name() + " (allocation command)", "ignore deciders"));
}
use of org.elasticsearch.cluster.routing.RoutingNodes in project crate by crate.
the class CancelAllocationCommand method execute.
@Override
public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) {
DiscoveryNode discoNode = allocation.nodes().resolveNode(node);
ShardRouting shardRouting = null;
RoutingNodes routingNodes = allocation.routingNodes();
RoutingNode routingNode = routingNodes.node(discoNode.getId());
IndexMetadata indexMetadata = null;
if (routingNode != null) {
indexMetadata = allocation.metadata().index(index());
if (indexMetadata == null) {
throw new IndexNotFoundException(index());
}
ShardId shardId = new ShardId(indexMetadata.getIndex(), shardId());
shardRouting = routingNode.getByShardId(shardId);
}
if (shardRouting == null) {
if (explain) {
return new RerouteExplanation(this, allocation.decision(Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + ", failed to find it on node " + discoNode));
}
throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + ", failed to find it on node " + discoNode);
}
if (shardRouting.primary() && allowPrimary == false) {
if ((shardRouting.initializing() && shardRouting.relocatingNodeId() != null) == false) {
// only allow cancelling initializing shard of primary relocation without allowPrimary flag
if (explain) {
return new RerouteExplanation(this, allocation.decision(Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + " on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT)));
}
throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT));
}
}
routingNodes.failShard(LogManager.getLogger(CancelAllocationCommand.class), shardRouting, new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null), indexMetadata, allocation.changes());
// TODO: We don't have to remove a cancelled shard from in-sync set once we have a strict resync implementation.
allocation.removeAllocationId(shardRouting);
return new RerouteExplanation(this, allocation.decision(Decision.YES, "cancel_allocation_command", "shard " + shardId + " on node " + discoNode + " can be cancelled"));
}
Aggregations