use of org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata in project crate by crate.
the class ReplicaShardAllocator method processExistingRecoveries.
/**
* Process existing recoveries of replicas and see if we need to cancel them if we find a better
* match. Today, a better match is one that can perform a no-op recovery while the previous recovery
* has to copy segment files.
*/
public void processExistingRecoveries(RoutingAllocation allocation) {
Metadata metadata = allocation.metadata();
RoutingNodes routingNodes = allocation.routingNodes();
List<Runnable> shardCancellationActions = new ArrayList<>();
for (RoutingNode routingNode : routingNodes) {
for (ShardRouting shard : routingNode) {
if (shard.primary()) {
continue;
}
if (shard.initializing() == false) {
continue;
}
if (shard.relocatingNodeId() != null) {
continue;
}
// if we are allocating a replica because of index creation, no need to go and find a copy, there isn't one...
if (shard.unassignedInfo() != null && shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) {
continue;
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(shard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: fetching new stores for initializing shard", shard);
// still fetching
continue;
}
ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard.shardId());
assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// just let the recovery find it out, no need to do anything about it for the initializing shard
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", shard);
continue;
}
MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, true, primaryNode, primaryStore, shardStores, false);
if (matchingNodes.getNodeWithHighestMatch() != null) {
DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId());
DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch();
// current node will not be in matchingNodes as it is filtered away by SameShardAllocationDecider
if (currentNode.equals(nodeWithHighestMatch) == false && matchingNodes.canPerformNoopRecovery(nodeWithHighestMatch) && canPerformOperationBasedRecovery(primaryStore, shardStores, currentNode) == false) {
// we found a better match that can perform noop recovery, cancel the existing allocation.
logger.debug("cancelling allocation of replica on [{}], can perform a noop recovery on node [{}]", currentNode, nodeWithHighestMatch);
final Set<String> failedNodeIds = shard.unassignedInfo() == null ? Collections.emptySet() : shard.unassignedInfo().getFailedNodeIds();
UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, can perform a noop recovery on [" + nodeWithHighestMatch + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT, failedNodeIds);
// don't cancel shard in the loop as it will cause a ConcurrentModificationException
shardCancellationActions.add(() -> routingNodes.failShard(logger, shard, unassignedInfo, metadata.getIndexSafe(shard.index()), allocation.changes()));
}
}
}
}
for (Runnable action : shardCancellationActions) {
action.run();
}
}
use of org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata in project crate by crate.
the class ReplicaShardAllocator method findMatchingNodes.
private MatchingNodes findMatchingNodes(ShardRouting shard, RoutingAllocation allocation, boolean noMatchFailedNodes, DiscoveryNode primaryNode, TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore, AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> data, boolean explain) {
Map<DiscoveryNode, MatchingNode> matchingNodes = new HashMap<>();
Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null;
for (Map.Entry<DiscoveryNode, NodeStoreFilesMetadata> nodeStoreEntry : data.getData().entrySet()) {
DiscoveryNode discoNode = nodeStoreEntry.getKey();
if (noMatchFailedNodes && shard.unassignedInfo() != null && shard.unassignedInfo().getFailedNodeIds().contains(discoNode.getId())) {
continue;
}
TransportNodesListShardStoreMetadata.StoreFilesMetadata storeFilesMetadata = nodeStoreEntry.getValue().storeFilesMetadata();
// we don't have any files at all, it is an empty index
if (storeFilesMetadata.isEmpty()) {
continue;
}
RoutingNode node = allocation.routingNodes().node(discoNode.getId());
if (node == null) {
continue;
}
// check if we can allocate on that node...
// we only check for NO, since if this node is THROTTLING and it has enough "same data"
// then we will try and assign it next time
Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
MatchingNode matchingNode = null;
if (explain) {
matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
ShardStoreInfo shardStoreInfo = new ShardStoreInfo(matchingNode.matchingBytes);
nodeDecisions.put(node.nodeId(), new NodeAllocationResult(discoNode, shardStoreInfo, decision));
}
if (decision.type() == Decision.Type.NO) {
continue;
}
if (matchingNode == null) {
matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
}
matchingNodes.put(discoNode, matchingNode);
if (logger.isTraceEnabled()) {
if (matchingNode.isNoopRecovery) {
logger.trace("{}: node [{}] can perform a noop recovery", shard, discoNode.getName());
} else if (matchingNode.retainingSeqNo >= 0) {
logger.trace("{}: node [{}] can perform operation-based recovery with retaining sequence number [{}]", shard, discoNode.getName(), matchingNode.retainingSeqNo);
} else {
logger.trace("{}: node [{}] has [{}/{}] bytes of re-usable data", shard, discoNode.getName(), new ByteSizeValue(matchingNode.matchingBytes), matchingNode.matchingBytes);
}
}
}
return new MatchingNodes(matchingNodes, nodeDecisions);
}
Aggregations