use of org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision in project elasticsearch by elastic.
the class ClusterAllocationExplanationTests method randomClusterAllocationExplanation.
private static ClusterAllocationExplanation randomClusterAllocationExplanation(boolean assignedShard) {
ShardRouting shardRouting = TestShardRouting.newShardRouting(new ShardId(new Index("idx", "123"), 0), assignedShard ? "node-0" : null, true, assignedShard ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED);
DiscoveryNode node = assignedShard ? new DiscoveryNode("node-0", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT) : null;
ShardAllocationDecision shardAllocationDecision;
if (assignedShard) {
MoveDecision moveDecision = MoveDecision.cannotRebalance(Decision.YES, AllocationDecision.NO, 3, null).withRemainDecision(Decision.YES);
shardAllocationDecision = new ShardAllocationDecision(AllocateUnassignedDecision.NOT_TAKEN, moveDecision);
} else {
AllocateUnassignedDecision allocateDecision = AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.DECIDERS_NO, null);
shardAllocationDecision = new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
}
return new ClusterAllocationExplanation(shardRouting, node, null, null, shardAllocationDecision);
}
use of org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision in project elasticsearch by elastic.
the class ClusterAllocationExplainIT method testCannotAllocateStaleReplicaExplanation.
public void testCannotAllocateStaleReplicaExplanation() throws Exception {
logger.info("--> starting 3 nodes");
final String masterNode = internalCluster().startNode();
// start replica node first, so it's path will be used first when we start a node after
// stopping all of them at end of test.
final String replicaNode = internalCluster().startNode();
final String primaryNode = internalCluster().startNode();
logger.info("--> creating an index with 1 primary and 1 replica");
createIndexAndIndexData(1, 1, Settings.builder().put("index.routing.allocation.include._name", primaryNode).put("index.routing.allocation.exclude._name", masterNode).build(), ActiveShardCount.ONE);
client().admin().indices().prepareUpdateSettings("idx").setSettings(Settings.builder().put("index.routing.allocation.include._name", (String) null)).get();
ensureGreen();
assertThat(replicaNode().getName(), equalTo(replicaNode));
assertThat(primaryNodeName(), equalTo(primaryNode));
logger.info("--> stop node with the replica shard");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(replicaNode));
logger.info("--> index more data, now the replica is stale");
indexData();
logger.info("--> stop the node with the primary");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode));
logger.info("--> restart the node with the stale replica");
String restartedNode = internalCluster().startDataOnlyNode();
// wait for the master to finish processing join.
ensureClusterSizeConsistency();
// wait until the system has fetched shard data and we know there is no valid shard copy
assertBusy(() -> {
ClusterAllocationExplanation explanation = client().admin().cluster().prepareAllocationExplain().setIndex("idx").setShard(0).setPrimary(true).get().getExplanation();
assertTrue(explanation.getShardAllocationDecision().getAllocateDecision().isDecisionTaken());
assertEquals(AllocationDecision.NO_VALID_SHARD_COPY, explanation.getShardAllocationDecision().getAllocateDecision().getAllocationDecision());
});
boolean includeYesDecisions = randomBoolean();
boolean includeDiskInfo = randomBoolean();
ClusterAllocationExplanation explanation = runExplain(true, includeYesDecisions, includeDiskInfo);
ShardId shardId = explanation.getShard();
boolean isPrimary = explanation.isPrimary();
ShardRoutingState shardRoutingState = explanation.getShardState();
DiscoveryNode currentNode = explanation.getCurrentNode();
UnassignedInfo unassignedInfo = explanation.getUnassignedInfo();
AllocateUnassignedDecision allocateDecision = explanation.getShardAllocationDecision().getAllocateDecision();
MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision();
// verify shard info
assertEquals("idx", shardId.getIndexName());
assertEquals(0, shardId.getId());
assertTrue(isPrimary);
// verify current node info
assertEquals(ShardRoutingState.UNASSIGNED, shardRoutingState);
assertNull(currentNode);
// verify unassigned info
assertNotNull(unassignedInfo);
// verify decision object
assertTrue(allocateDecision.isDecisionTaken());
assertFalse(moveDecision.isDecisionTaken());
assertEquals(AllocationDecision.NO_VALID_SHARD_COPY, allocateDecision.getAllocationDecision());
assertEquals(2, allocateDecision.getNodeDecisions().size());
for (NodeAllocationResult nodeAllocationResult : allocateDecision.getNodeDecisions()) {
if (nodeAllocationResult.getNode().getName().equals(restartedNode)) {
assertNotNull(nodeAllocationResult.getShardStoreInfo());
assertNotNull(nodeAllocationResult.getShardStoreInfo().getAllocationId());
assertFalse(nodeAllocationResult.getShardStoreInfo().isInSync());
assertNull(nodeAllocationResult.getShardStoreInfo().getStoreException());
} else {
assertNotNull(nodeAllocationResult.getShardStoreInfo());
assertNull(nodeAllocationResult.getShardStoreInfo().getAllocationId());
assertFalse(nodeAllocationResult.getShardStoreInfo().isInSync());
assertNull(nodeAllocationResult.getShardStoreInfo().getStoreException());
}
}
// verify JSON output
try (XContentParser parser = getParser(explanation)) {
verifyShardInfo(parser, true, includeDiskInfo, ShardRoutingState.UNASSIGNED);
parser.nextToken();
assertEquals("can_allocate", parser.currentName());
parser.nextToken();
assertEquals(AllocationDecision.NO_VALID_SHARD_COPY.toString(), parser.text());
parser.nextToken();
assertEquals("allocate_explanation", parser.currentName());
parser.nextToken();
assertEquals("cannot allocate because all found copies of the shard are either stale or corrupt", parser.text());
verifyStaleShardCopyNodeDecisions(parser, 2, Collections.singleton(restartedNode));
}
}
use of org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision in project crate by crate.
the class ReplicaShardAllocator method makeAllocationDecision.
@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
if (isResponsibleFor(unassignedShard) == false) {
// this allocator is not responsible for deciding on this shard
return AllocateUnassignedDecision.NOT_TAKEN;
}
final RoutingNodes routingNodes = allocation.routingNodes();
final boolean explain = allocation.debugDecision();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
Decision allocateDecision = result.v1();
if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
// only return early if we are not in explain mode, or we are in explain mode but we have not
// yet attempted to fetch any shard data
logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
allocation.setHasPendingAsyncFetch();
List<NodeAllocationResult> nodeDecisions = null;
if (explain) {
nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
}
return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}
ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
if (primaryShard == null) {
assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
}
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
// will try and recover from
// Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
return AllocateUnassignedDecision.NOT_TAKEN;
}
MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
if (allocateDecision.type() != Decision.Type.YES) {
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
} else if (matchingNodes.getNodeWithHighestMatch() != null) {
RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
// we only check on THROTTLE since we checked before before on NO
Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
return AllocateUnassignedDecision.throttle(nodeDecisions);
} else {
logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we found a match
return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
}
} else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
// if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
// unassigned due to a node leaving, so we delay allocation of this replica to see if the
// node with the shard copy will rejoin so we can re-use the copy it has
logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
long remainingDelayMillis = 0L;
long totalDelayMillis = 0L;
if (explain) {
UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
Metadata metadata = allocation.metadata();
IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
}
return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
}
return AllocateUnassignedDecision.NOT_TAKEN;
}
use of org.elasticsearch.cluster.routing.allocation.AllocateUnassignedDecision in project crate by crate.
the class SysAllocations method createSysAllocations.
private SysAllocation createSysAllocations(RoutingAllocation allocation, ShardRouting shardRouting) {
allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS);
Supplier<ShardAllocationDecision> shardDecision = () -> {
if (shardRouting.initializing() || shardRouting.relocating()) {
return ShardAllocationDecision.NOT_TAKEN;
} else {
AllocateUnassignedDecision allocateDecision = shardRouting.unassigned() ? gatewayAllocator.decideUnassignedShardAllocation(shardRouting, allocation) : AllocateUnassignedDecision.NOT_TAKEN;
if (allocateDecision.isDecisionTaken() == false) {
return shardAllocator.decideShardAllocation(shardRouting, allocation);
} else {
return new ShardAllocationDecision(allocateDecision, MoveDecision.NOT_TAKEN);
}
}
};
return new SysAllocation(shardRouting.shardId(), shardRouting.state(), shardDecision, shardRouting.currentNodeId(), shardRouting.primary());
}
Aggregations