use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.
public void testDelayedMappingPropagationOnReplica() throws Exception {
// This is essentially the same thing as testDelayedMappingPropagationOnPrimary
// but for replicas
// Here we want to test that everything goes well if the mappings that
// are needed for a document are not available on the replica at the
// time of indexing it
final List<String> nodeNames = internalCluster().startNodes(2);
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
final String master = internalCluster().getMasterName();
assertThat(nodeNames, hasItem(master));
String otherNode = null;
for (String node : nodeNames) {
if (node.equals(master) == false) {
otherNode = node;
break;
}
}
assertNotNull(otherNode);
// Force allocation of the primary on the master node by first only allocating on the master
// and then allowing all nodes so that the replica gets allocated on the other node
prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get();
client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get();
ensureGreen();
// Check routing tables
ClusterState state = client().admin().cluster().prepareState().get().getState();
assertEquals(master, state.nodes().getMasterNode().getName());
List<ShardRouting> shards = state.routingTable().allShards("index");
assertThat(shards, hasSize(2));
for (ShardRouting shard : shards) {
if (shard.primary()) {
// primary must be on the master
assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
} else {
assertTrue(shard.active());
}
}
// Block cluster state processing on the replica
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
internalCluster().setDisruptionScheme(disruption);
disruption.startDisrupting();
final ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
final Index index = resolveIndex("index");
// Wait for mappings to be available on master
assertBusy(() -> {
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
final IndexService indexService = indicesService.indexServiceSafe(index);
assertNotNull(indexService);
final MapperService mapperService = indexService.mapperService();
DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
assertNotNull(mapper);
assertNotNull(mapper.mappers().getMapper("field"));
});
final ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
assertBusy(() -> assertTrue(client().prepareGet("index", "1").get().isExists()));
// index another document, this time using dynamic mappings.
// The ack timeout of 0 on dynamic mapping updates makes it possible for the document to be indexed on the primary, even
// if the dynamic mapping update is not applied on the replica yet.
// this request does not change the cluster state, because the mapping is dynamic,
// we need to await and cancel committed publication
ActionFuture<IndexResponse> dynamicMappingsFut = executeAndCancelCommittedPublication(client().prepareIndex("index").setId("2").setSource("field2", 42));
// ...and wait for second mapping to be available on master
assertBusy(() -> {
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
final IndexService indexService = indicesService.indexServiceSafe(index);
assertNotNull(indexService);
final MapperService mapperService = indexService.mapperService();
DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
assertNotNull(mapper);
assertNotNull(mapper.mappers().getMapper("field2"));
});
assertBusy(() -> assertTrue(client().prepareGet("index", "2").get().isExists()));
// The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
// We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
// and not just because it takes time to replicate the indexing request to the replica
Thread.sleep(100);
assertFalse(putMappingResponse.isDone());
assertFalse(docIndexResponse.isDone());
// Now make sure the indexing request finishes successfully
disruption.stopDisrupting();
assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
// both shards should have succeeded
assertEquals(2, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
assertThat(dynamicMappingsFut.get(10, TimeUnit.SECONDS).getResult(), equalTo(CREATED));
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class SearchRedStateIndexIT method buildRedIndex.
private void buildRedIndex(int numShards) throws Exception {
assertAcked(prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", numShards).put("index.number_of_replicas", 0)));
ensureGreen();
for (int i = 0; i < 10; i++) {
client().prepareIndex("test").setId("" + i).setSource("field1", "value1").get();
}
refresh();
internalCluster().stopRandomDataNode();
client().admin().cluster().prepareHealth().setWaitForStatus(ClusterHealthStatus.RED).get();
assertBusy(() -> {
ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
List<ShardRouting> unassigneds = clusterState.getRoutingTable().shardsWithState(ShardRoutingState.UNASSIGNED);
assertThat(unassigneds.size(), greaterThan(0));
});
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class TransportClusterAllocationExplainAction method masterOperation.
@Override
protected void masterOperation(final ClusterAllocationExplainRequest request, final ClusterState state, final ActionListener<ClusterAllocationExplainResponse> listener) {
final RoutingNodes routingNodes = state.getRoutingNodes();
final ClusterInfo clusterInfo = clusterInfoService.getClusterInfo();
final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, clusterInfo, snapshotsInfoService.snapshotShardSizes(), System.nanoTime());
ShardRouting shardRouting = findShardToExplain(request, allocation);
logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);
ClusterAllocationExplanation cae = explainShard(shardRouting, allocation, request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService);
listener.onResponse(new ClusterAllocationExplainResponse(cae));
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class GatewayAllocator method applyStartedShards.
@Override
public void applyStartedShards(final List<ShardRouting> startedShards, final RoutingAllocation allocation) {
for (ShardRouting startedShard : startedShards) {
Releasables.close(asyncFetchStarted.remove(startedShard.shardId()));
Releasables.close(asyncFetchStore.remove(startedShard.shardId()));
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class ReplicaShardAllocator method makeAllocationDecision.
@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
if (isResponsibleFor(unassignedShard) == false) {
// this allocator is not responsible for deciding on this shard
return AllocateUnassignedDecision.NOT_TAKEN;
}
final RoutingNodes routingNodes = allocation.routingNodes();
final boolean explain = allocation.debugDecision();
// pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
Decision allocateDecision = result.v1();
if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
// only return early if we are not in explain mode, or we are in explain mode but we have not
// yet attempted to fetch any shard data
logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
}
AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
if (shardStores.hasData() == false) {
logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
allocation.setHasPendingAsyncFetch();
List<NodeAllocationResult> nodeDecisions = null;
if (explain) {
nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
}
return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
}
ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
if (primaryShard == null) {
assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
}
assert primaryShard.currentNodeId() != null;
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
if (primaryStore == null) {
// if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
// we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
// will try and recover from
// Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
return AllocateUnassignedDecision.NOT_TAKEN;
}
MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
if (allocateDecision.type() != Decision.Type.YES) {
return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
} else if (matchingNodes.getNodeWithHighestMatch() != null) {
RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
// we only check on THROTTLE since we checked before on NO
Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
if (decision.type() == Decision.Type.THROTTLE) {
logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
return AllocateUnassignedDecision.throttle(nodeDecisions);
} else {
logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
// we found a match
return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
}
} else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
// if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
// unassigned due to a node leaving, so we delay allocation of this replica to see if the
// node with the shard copy will rejoin so we can re-use the copy it has
logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
long remainingDelayMillis = 0L;
long totalDelayMillis = 0L;
if (explain) {
UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
Metadata metadata = allocation.metadata();
IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
}
return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
}
return AllocateUnassignedDecision.NOT_TAKEN;
}
Aggregations