Search in sources :

Example 6 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.

public void testDelayedMappingPropagationOnReplica() throws Exception {
    // This is essentially the same thing as testDelayedMappingPropagationOnPrimary
    // but for replicas
    // Here we want to test that everything goes well if the mappings that
    // are needed for a document are not available on the replica at the
    // time of indexing it
    final List<String> nodeNames = internalCluster().startNodes(2);
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    final String master = internalCluster().getMasterName();
    assertThat(nodeNames, hasItem(master));
    String otherNode = null;
    for (String node : nodeNames) {
        if (node.equals(master) == false) {
            otherNode = node;
            break;
        }
    }
    assertNotNull(otherNode);
    // Force allocation of the primary on the master node by first only allocating on the master
    // and then allowing all nodes so that the replica gets allocated on the other node
    prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get();
    client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get();
    ensureGreen();
    // Check routing tables
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    assertEquals(master, state.nodes().getMasterNode().getName());
    List<ShardRouting> shards = state.routingTable().allShards("index");
    assertThat(shards, hasSize(2));
    for (ShardRouting shard : shards) {
        if (shard.primary()) {
            // primary must be on the master
            assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
        } else {
            assertTrue(shard.active());
        }
    }
    // Block cluster state processing on the replica
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
    internalCluster().setDisruptionScheme(disruption);
    disruption.startDisrupting();
    final ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
    final Index index = resolveIndex("index");
    // Wait for mappings to be available on master
    assertBusy(() -> {
        final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
        final IndexService indexService = indicesService.indexServiceSafe(index);
        assertNotNull(indexService);
        final MapperService mapperService = indexService.mapperService();
        DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
        assertNotNull(mapper);
        assertNotNull(mapper.mappers().getMapper("field"));
    });
    final ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
    assertBusy(() -> assertTrue(client().prepareGet("index", "1").get().isExists()));
    // index another document, this time using dynamic mappings.
    // The ack timeout of 0 on dynamic mapping updates makes it possible for the document to be indexed on the primary, even
    // if the dynamic mapping update is not applied on the replica yet.
    // this request does not change the cluster state, because the mapping is dynamic,
    // we need to await and cancel committed publication
    ActionFuture<IndexResponse> dynamicMappingsFut = executeAndCancelCommittedPublication(client().prepareIndex("index").setId("2").setSource("field2", 42));
    // ...and wait for second mapping to be available on master
    assertBusy(() -> {
        final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
        final IndexService indexService = indicesService.indexServiceSafe(index);
        assertNotNull(indexService);
        final MapperService mapperService = indexService.mapperService();
        DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
        assertNotNull(mapper);
        assertNotNull(mapper.mappers().getMapper("field2"));
    });
    assertBusy(() -> assertTrue(client().prepareGet("index", "2").get().isExists()));
    // The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
    // We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
    // and not just because it takes time to replicate the indexing request to the replica
    Thread.sleep(100);
    assertFalse(putMappingResponse.isDone());
    assertFalse(docIndexResponse.isDone());
    // Now make sure the indexing request finishes successfully
    disruption.stopDisrupting();
    assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
    assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
    // both shards should have succeeded
    assertEquals(2, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
    assertThat(dynamicMappingsFut.get(10, TimeUnit.SECONDS).getResult(), equalTo(CREATED));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing) IndexService(org.opensearch.index.IndexService) DocumentMapper(org.opensearch.index.mapper.DocumentMapper) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) IndicesService(org.opensearch.indices.IndicesService) Index(org.opensearch.index.Index) IndexResponse(org.opensearch.action.index.IndexResponse) ShardRouting(org.opensearch.cluster.routing.ShardRouting) MapperService(org.opensearch.index.mapper.MapperService)

Example 7 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class SearchRedStateIndexIT method buildRedIndex.

private void buildRedIndex(int numShards) throws Exception {
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", numShards).put("index.number_of_replicas", 0)));
    ensureGreen();
    for (int i = 0; i < 10; i++) {
        client().prepareIndex("test").setId("" + i).setSource("field1", "value1").get();
    }
    refresh();
    internalCluster().stopRandomDataNode();
    client().admin().cluster().prepareHealth().setWaitForStatus(ClusterHealthStatus.RED).get();
    assertBusy(() -> {
        ClusterState clusterState = client().admin().cluster().prepareState().get().getState();
        List<ShardRouting> unassigneds = clusterState.getRoutingTable().shardsWithState(ShardRoutingState.UNASSIGNED);
        assertThat(unassigneds.size(), greaterThan(0));
    });
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 8 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class TransportClusterAllocationExplainAction method masterOperation.

@Override
protected void masterOperation(final ClusterAllocationExplainRequest request, final ClusterState state, final ActionListener<ClusterAllocationExplainResponse> listener) {
    final RoutingNodes routingNodes = state.getRoutingNodes();
    final ClusterInfo clusterInfo = clusterInfoService.getClusterInfo();
    final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state, clusterInfo, snapshotsInfoService.snapshotShardSizes(), System.nanoTime());
    ShardRouting shardRouting = findShardToExplain(request, allocation);
    logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);
    ClusterAllocationExplanation cae = explainShard(shardRouting, allocation, request.includeDiskInfo() ? clusterInfo : null, request.includeYesDecisions(), allocationService);
    listener.onResponse(new ClusterAllocationExplainResponse(cae));
}
Also used : ClusterInfo(org.opensearch.cluster.ClusterInfo) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) RoutingAllocation(org.opensearch.cluster.routing.allocation.RoutingAllocation) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 9 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class GatewayAllocator method applyStartedShards.

@Override
public void applyStartedShards(final List<ShardRouting> startedShards, final RoutingAllocation allocation) {
    for (ShardRouting startedShard : startedShards) {
        Releasables.close(asyncFetchStarted.remove(startedShard.shardId()));
        Releasables.close(asyncFetchStore.remove(startedShard.shardId()));
    }
}
Also used : ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 10 with ShardRouting

use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.

the class ReplicaShardAllocator method makeAllocationDecision.

@Override
public AllocateUnassignedDecision makeAllocationDecision(final ShardRouting unassignedShard, final RoutingAllocation allocation, final Logger logger) {
    if (isResponsibleFor(unassignedShard) == false) {
        // this allocator is not responsible for deciding on this shard
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    final RoutingNodes routingNodes = allocation.routingNodes();
    final boolean explain = allocation.debugDecision();
    // pre-check if it can be allocated to any node that currently exists, so we won't list the store for it for nothing
    Tuple<Decision, Map<String, NodeAllocationResult>> result = canBeAllocatedToAtLeastOneNode(unassignedShard, allocation);
    Decision allocateDecision = result.v1();
    if (allocateDecision.type() != Decision.Type.YES && (explain == false || hasInitiatedFetching(unassignedShard) == false)) {
        // only return early if we are not in explain mode, or we are in explain mode but we have not
        // yet attempted to fetch any shard data
        logger.trace("{}: ignoring allocation, can't be allocated on any node", unassignedShard);
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null);
    }
    AsyncShardFetch.FetchResult<NodeStoreFilesMetadata> shardStores = fetchData(unassignedShard, allocation);
    if (shardStores.hasData() == false) {
        logger.trace("{}: ignoring allocation, still fetching shard stores", unassignedShard);
        allocation.setHasPendingAsyncFetch();
        List<NodeAllocationResult> nodeDecisions = null;
        if (explain) {
            nodeDecisions = buildDecisionsForAllNodes(unassignedShard, allocation);
        }
        return AllocateUnassignedDecision.no(AllocationStatus.FETCHING_SHARD_DATA, nodeDecisions);
    }
    ShardRouting primaryShard = routingNodes.activePrimary(unassignedShard.shardId());
    if (primaryShard == null) {
        assert explain : "primary should only be null here if we are in explain mode, so we didn't " + "exit early when canBeAllocatedToAtLeastOneNode didn't return a YES decision";
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), new ArrayList<>(result.v2().values()));
    }
    assert primaryShard.currentNodeId() != null;
    final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
    final TransportNodesListShardStoreMetadata.StoreFilesMetadata primaryStore = findStore(primaryNode, shardStores);
    if (primaryStore == null) {
        // if we can't find the primary data, it is probably because the primary shard is corrupted (and listing failed)
        // we want to let the replica be allocated in order to expose the actual problem with the primary that the replica
        // will try and recover from
        // Note, this is the existing behavior, as exposed in running CorruptFileTest#testNoPrimaryData
        logger.trace("{}: no primary shard store found or allocated, letting actual allocation figure it out", unassignedShard);
        return AllocateUnassignedDecision.NOT_TAKEN;
    }
    MatchingNodes matchingNodes = findMatchingNodes(unassignedShard, allocation, false, primaryNode, primaryStore, shardStores, explain);
    assert explain == false || matchingNodes.nodeDecisions != null : "in explain mode, we must have individual node decisions";
    List<NodeAllocationResult> nodeDecisions = augmentExplanationsWithStoreInfo(result.v2(), matchingNodes.nodeDecisions);
    if (allocateDecision.type() != Decision.Type.YES) {
        return AllocateUnassignedDecision.no(UnassignedInfo.AllocationStatus.fromDecision(allocateDecision.type()), nodeDecisions);
    } else if (matchingNodes.getNodeWithHighestMatch() != null) {
        RoutingNode nodeWithHighestMatch = allocation.routingNodes().node(matchingNodes.getNodeWithHighestMatch().getId());
        // we only check on THROTTLE since we checked before on NO
        Decision decision = allocation.deciders().canAllocate(unassignedShard, nodeWithHighestMatch, allocation);
        if (decision.type() == Decision.Type.THROTTLE) {
            logger.debug("[{}][{}]: throttling allocation [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we are throttling this, as we have enough other shards to allocate to this node, so ignore it for now
            return AllocateUnassignedDecision.throttle(nodeDecisions);
        } else {
            logger.debug("[{}][{}]: allocating [{}] to [{}] in order to reuse its unallocated persistent store", unassignedShard.index(), unassignedShard.id(), unassignedShard, nodeWithHighestMatch.node());
            // we found a match
            return AllocateUnassignedDecision.yes(nodeWithHighestMatch.node(), null, nodeDecisions, true);
        }
    } else if (matchingNodes.hasAnyData() == false && unassignedShard.unassignedInfo().isDelayed()) {
        // if we didn't manage to find *any* data (regardless of matching sizes), and the replica is
        // unassigned due to a node leaving, so we delay allocation of this replica to see if the
        // node with the shard copy will rejoin so we can re-use the copy it has
        logger.debug("{}: allocation of [{}] is delayed", unassignedShard.shardId(), unassignedShard);
        long remainingDelayMillis = 0L;
        long totalDelayMillis = 0L;
        if (explain) {
            UnassignedInfo unassignedInfo = unassignedShard.unassignedInfo();
            Metadata metadata = allocation.metadata();
            IndexMetadata indexMetadata = metadata.index(unassignedShard.index());
            totalDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetadata.getSettings()).getMillis();
            long remainingDelayNanos = unassignedInfo.getRemainingDelay(System.nanoTime(), indexMetadata.getSettings());
            remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
        }
        return AllocateUnassignedDecision.delayed(remainingDelayMillis, totalDelayMillis, nodeDecisions);
    }
    return AllocateUnassignedDecision.NOT_TAKEN;
}
Also used : NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) ArrayList(java.util.ArrayList) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) StoreFileMetadata(org.opensearch.index.store.StoreFileMetadata) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) NodeStoreFilesMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata.NodeStoreFilesMetadata) Decision(org.opensearch.cluster.routing.allocation.decider.Decision) AllocateUnassignedDecision(org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision) RoutingNode(org.opensearch.cluster.routing.RoutingNode) TransportNodesListShardStoreMetadata(org.opensearch.indices.store.TransportNodesListShardStoreMetadata) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map) NodeAllocationResult(org.opensearch.cluster.routing.allocation.NodeAllocationResult)

Aggregations

ShardRouting (org.opensearch.cluster.routing.ShardRouting)361 ClusterState (org.opensearch.cluster.ClusterState)172 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)135 ShardId (org.opensearch.index.shard.ShardId)110 TestShardRouting (org.opensearch.cluster.routing.TestShardRouting)100 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)93 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)85 RoutingTable (org.opensearch.cluster.routing.RoutingTable)84 Settings (org.opensearch.common.settings.Settings)83 Metadata (org.opensearch.cluster.metadata.Metadata)71 HashSet (java.util.HashSet)59 RoutingNode (org.opensearch.cluster.routing.RoutingNode)59 ArrayList (java.util.ArrayList)57 IOException (java.io.IOException)56 List (java.util.List)50 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)50 Index (org.opensearch.index.Index)50 UnassignedInfo (org.opensearch.cluster.routing.UnassignedInfo)49 IndexShard (org.opensearch.index.shard.IndexShard)49 ActionListener (org.opensearch.action.ActionListener)45