Search in sources :

Example 6 with NodeStoreFilesMetadata

use of org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata in project crate by crate.

the class ReplicaShardAllocator method processExistingRecoveries.

/**
 * Process existing recoveries of replicas and see if we need to cancel them if we find a better
 * match. Today, a better match is one that can perform a no-op recovery while the previous recovery
 * has to copy segment files.
 */
public void processExistingRecoveries(RoutingAllocation allocation) {
    Metadata metadata = allocation.metadata();
    RoutingNodes routingNodes = allocation.routingNodes();
    List<Runnable> shardCancellationActions = new ArrayList<>();
    for (RoutingNode routingNode : routingNodes) {
        for (ShardRouting shard : routingNode) {
            if (shard.primary()) {
                continue;
            }
            if (shard.initializing() == false) {
                continue;
            }
            if (shard.relocatingNodeId() != null) {
                continue;
            }
            // if we are allocating a replica because of index creation, no need to go and find a copy, there isn't one...
            if (shard.unassignedInfo() != null && shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) {
                continue;
            }
            AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(shard, allocation);
            if (shardStores.hasData() == false) {
                logger.trace("{}: fetching new stores for initializing shard", shard);
                // still fetching
                continue;
            }
            ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard.shardId());
            assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
            assert primaryShard.currentNodeId() != null;
            final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
            final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
            if (primaryStore == null) {
                // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
                // just let the recovery find it out, no need to do anything about it for the initializing shard
                logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", shard);
                continue;
            }
            MatchingNodes matchingNodes = findMatchingNodes(shard, allocation, true, primaryNode, primaryStore, shardStores, false);
            if (matchingNodes.getNodeWithHighestMatch() != null) {
                DiscoveryNode currentNode = allocation.nodes().get(shard.currentNodeId());
                DiscoveryNode nodeWithHighestMatch = matchingNodes.getNodeWithHighestMatch();
                // current node will not be in matchingNodes as it is filtered away by SameShardAllocationDecider
                if (currentNode.equals(nodeWithHighestMatch) == false && matchingNodes.canPerformNoopRecovery(nodeWithHighestMatch) && canPerformOperationBasedRecovery(primaryStore, shardStores, currentNode) == false) {
                    // we found a better match that can perform noop recovery, cancel the existing allocation.
                    logger.debug("cancelling allocation of replica on [{}], can perform a noop recovery on node [{}]", currentNode, nodeWithHighestMatch);
                    final Set<String> failedNodeIds = shard.unassignedInfo() == null ? Collections.emptySet() : shard.unassignedInfo().getFailedNodeIds();
                    UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, can perform a noop recovery on [" + nodeWithHighestMatch + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT, failedNodeIds);
                    // don't cancel shard in the loop as it will cause a ConcurrentModificationException
                    shardCancellationActions.add(() -> routingNodes.failShard(logger, shard, unassignedInfo, metadata.getIndexSafe(shard.index()), allocation.changes()));
                }
            }
        }
    }
    for (Runnable action : shardCancellationActions) {
        action.run();
    }
}
Also used : NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) UnassignedInfo(org.elasticsearch.cluster.routing.UnassignedInfo) IndexMetadata(org.elasticsearch.cluster.metadata.IndexMetadata) StoreFileMetadata(org.elasticsearch.index.store.StoreFileMetadata) Metadata(org.elasticsearch.cluster.metadata.Metadata) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) ArrayList(java.util.ArrayList) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting)

Example 7 with NodeStoreFilesMetadata

use of org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata in project crate by crate.

the class ReplicaShardAllocator method findMatchingNodes.

private MatchingNodes findMatchingNodes(ShardRouting shard, RoutingAllocation allocation, boolean noMatchFailedNodes, DiscoveryNode primaryNode, TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore, AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> data, boolean explain) {
    Map<DiscoveryNode, MatchingNode> matchingNodes = new HashMap<>();
    Map<String, NodeAllocationResult> nodeDecisions = explain ? new HashMap<>() : null;
    for (Map.Entry<DiscoveryNode, NodeStoreFilesMetadata> nodeStoreEntry : data.getData().entrySet()) {
        DiscoveryNode discoNode = nodeStoreEntry.getKey();
        if (noMatchFailedNodes && shard.unassignedInfo() != null && shard.unassignedInfo().getFailedNodeIds().contains(discoNode.getId())) {
            continue;
        }
        TransportNodesListShardStoreMetadata.StoreFilesMetadata storeFilesMetadata = nodeStoreEntry.getValue().storeFilesMetadata();
        // we don't have any files at all, it is an empty index
        if (storeFilesMetadata.isEmpty()) {
            continue;
        }
        RoutingNode node = allocation.routingNodes().node(discoNode.getId());
        if (node == null) {
            continue;
        }
        // check if we can allocate on that node...
        // we only check for NO, since if this node is THROTTLING and it has enough "same data"
        // then we will try and assign it next time
        Decision decision = allocation.deciders().canAllocate(shard, node, allocation);
        MatchingNode matchingNode = null;
        if (explain) {
            matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
            ShardStoreInfo shardStoreInfo = new ShardStoreInfo(matchingNode.matchingBytes);
            nodeDecisions.put(node.nodeId(), new NodeAllocationResult(discoNode, shardStoreInfo, decision));
        }
        if (decision.type() == Decision.Type.NO) {
            continue;
        }
        if (matchingNode == null) {
            matchingNode = computeMatchingNode(primaryNode, primaryStore, discoNode, storeFilesMetadata);
        }
        matchingNodes.put(discoNode, matchingNode);
        if (logger.isTraceEnabled()) {
            if (matchingNode.isNoopRecovery) {
                logger.trace("{}: node [{}] can perform a noop recovery", shard, discoNode.getName());
            } else if (matchingNode.retainingSeqNo >= 0) {
                logger.trace("{}: node [{}] can perform operation-based recovery with retaining sequence number [{}]", shard, discoNode.getName(), matchingNode.retainingSeqNo);
            } else {
                logger.trace("{}: node [{}] has [{}/{}] bytes of re-usable data", shard, discoNode.getName(), new ByteSizeValue(matchingNode.matchingBytes), matchingNode.matchingBytes);
            }
        }
    }
    return new MatchingNodes(matchingNodes, nodeDecisions);
}
Also used : NodeStoreFilesMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) HashMap(java.util.HashMap) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) AllocateUnassignedDecision(org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision) Decision(org.elasticsearch.cluster.routing.allocation.decider.Decision) ShardStoreInfo(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult.ShardStoreInfo) RoutingNode(org.elasticsearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)

Aggregations

DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)6 RoutingNode (org.elasticsearch.cluster.routing.RoutingNode)6 ArrayList (java.util.ArrayList)4 HashMap (java.util.HashMap)4 Map (java.util.Map)4 RoutingNodes (org.elasticsearch.cluster.routing.RoutingNodes)4 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)4 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)4 AllocateUnassignedDecision (org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision)4 NodeAllocationResult (org.elasticsearch.cluster.routing.allocation.NodeAllocationResult)4 Decision (org.elasticsearch.cluster.routing.allocation.decider.Decision)4 NodeStoreFilesMetaData (org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData)4 TransportNodesListShardStoreMetaData (org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData)3 TransportNodesListShardStoreMetadata (org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata)3 NodeStoreFilesMetadata (org.elasticsearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata)3 ObjectLongHashMap (com.carrotsearch.hppc.ObjectLongHashMap)2 ObjectLongMap (com.carrotsearch.hppc.ObjectLongMap)2 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)2 IndexMetadata (org.elasticsearch.cluster.metadata.IndexMetadata)2 MetaData (org.elasticsearch.cluster.metadata.MetaData)2