Search in sources :

Example 1 with FailedShard

use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.

the class ClusterRerouteTests method testClusterStateUpdateTask.

public void testClusterStateUpdateTask() {
    AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE);
    ClusterState clusterState = createInitialClusterState(allocationService);
    ClusterRerouteRequest req = new ClusterRerouteRequest();
    req.dryRun(true);
    AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>();
    ActionListener<ClusterRerouteResponse> responseActionListener = new ActionListener<ClusterRerouteResponse>() {

        @Override
        public void onResponse(ClusterRerouteResponse clusterRerouteResponse) {
            responseRef.set(clusterRerouteResponse);
        }

        @Override
        public void onFailure(Exception e) {
        }
    };
    TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, responseActionListener);
    ClusterState execute = task.execute(clusterState);
    // dry-run
    assertSame(execute, clusterState);
    task.onAllNodesAcked(null);
    assertNotSame(responseRef.get().getState(), execute);
    // now we allocate
    req.dryRun(false);
    final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
    // now fail it N-1 times
    for (int i = 0; i < retries; i++) {
        ClusterState newState = task.execute(clusterState);
        // dry-run=false
        assertNotSame(newState, clusterState);
        clusterState = newState;
        RoutingTable routingTable = clusterState.routingTable();
        assertEquals(routingTable.index("idx").shards().size(), 1);
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i);
        List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException()));
        newState = allocationService.applyFailedShards(clusterState, failedShards);
        assertThat(newState, not(equalTo(clusterState)));
        clusterState = newState;
        routingTable = clusterState.routingTable();
        assertEquals(routingTable.index("idx").shards().size(), 1);
        if (i == retries - 1) {
            assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
        } else {
            assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
        }
        assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i + 1);
    }
    // without retry_failed we won't allocate that shard
    ClusterState newState = task.execute(clusterState);
    // dry-run=false
    assertNotSame(newState, clusterState);
    task.onAllNodesAcked(null);
    assertSame(responseRef.get().getState(), newState);
    RoutingTable routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
    // now we manually retry and get the shard back into initializing
    req.setRetryFailed(true);
    newState = task.execute(clusterState);
    // dry-run=false
    assertNotSame(newState, clusterState);
    clusterState = newState;
    routingTable = clusterState.routingTable();
    assertEquals(routingTable.index("idx").shards().size(), 1);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
    assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
}
Also used : TestGatewayAllocator(org.elasticsearch.test.gateway.TestGatewayAllocator) ClusterState(org.elasticsearch.cluster.ClusterState) BalancedShardsAllocator(org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator) FailedShard(org.elasticsearch.cluster.routing.allocation.FailedShard) AtomicReference(java.util.concurrent.atomic.AtomicReference) AllocationDeciders(org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders) MaxRetryAllocationDecider(org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider) IOException(java.io.IOException) ActionListener(org.elasticsearch.action.ActionListener) RoutingTable(org.elasticsearch.cluster.routing.RoutingTable) AllocationService(org.elasticsearch.cluster.routing.allocation.AllocationService)

Example 2 with FailedShard

use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.

the class UnassignedInfoTests method testFailedShard.

/**
     * Verifies that when a shard fails, reason is properly set and details are preserved.
     */
public void testFailedShard() {
    AllocationService allocation = createAllocationService();
    MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
    ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("test")).build()).build();
    clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
    clusterState = allocation.reroute(clusterState, "reroute");
    // starting primaries
    clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
    // starting replicas
    clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
    assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(false));
    // fail shard
    ShardRouting shardToFail = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0);
    clusterState = allocation.applyFailedShards(clusterState, Collections.singletonList(new FailedShard(shardToFail, "test fail", null)));
    // verify the reason and details
    assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(true));
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo(), notNullValue());
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getMessage(), equalTo("test fail"));
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getDetails(), equalTo("test fail"));
    assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getUnassignedTimeInMillis(), greaterThan(0L));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) MetaData(org.elasticsearch.cluster.metadata.MetaData) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) FailedShard(org.elasticsearch.cluster.routing.allocation.FailedShard) AllocationService(org.elasticsearch.cluster.routing.allocation.AllocationService)

Example 3 with FailedShard

use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.

the class IndicesClusterStateServiceRandomUpdatesTests method randomlyUpdateClusterState.

public ClusterState randomlyUpdateClusterState(ClusterState state, Map<DiscoveryNode, IndicesClusterStateService> clusterStateServiceMap, Supplier<MockIndicesService> indicesServiceSupplier) {
    // randomly create new indices (until we have 200 max)
    for (int i = 0; i < randomInt(5); i++) {
        if (state.metaData().indices().size() > 200) {
            break;
        }
        String name = "index_" + randomAsciiOfLength(15).toLowerCase(Locale.ROOT);
        Settings.Builder settingsBuilder = Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 3)).put(SETTING_NUMBER_OF_REPLICAS, randomInt(2));
        if (randomBoolean()) {
            settingsBuilder.put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true);
        }
        CreateIndexRequest request = new CreateIndexRequest(name, settingsBuilder.build()).waitForActiveShards(ActiveShardCount.NONE);
        state = cluster.createIndex(state, request);
        assertTrue(state.metaData().hasIndex(name));
    }
    // randomly delete indices
    Set<String> indicesToDelete = new HashSet<>();
    int numberOfIndicesToDelete = randomInt(Math.min(2, state.metaData().indices().size()));
    for (String index : randomSubsetOf(numberOfIndicesToDelete, state.metaData().indices().keys().toArray(String.class))) {
        indicesToDelete.add(state.metaData().index(index).getIndex().getName());
    }
    if (indicesToDelete.isEmpty() == false) {
        DeleteIndexRequest deleteRequest = new DeleteIndexRequest(indicesToDelete.toArray(new String[indicesToDelete.size()]));
        state = cluster.deleteIndices(state, deleteRequest);
        for (String index : indicesToDelete) {
            assertFalse(state.metaData().hasIndex(index));
        }
    }
    // randomly close indices
    int numberOfIndicesToClose = randomInt(Math.min(1, state.metaData().indices().size()));
    for (String index : randomSubsetOf(numberOfIndicesToClose, state.metaData().indices().keys().toArray(String.class))) {
        CloseIndexRequest closeIndexRequest = new CloseIndexRequest(state.metaData().index(index).getIndex().getName());
        state = cluster.closeIndices(state, closeIndexRequest);
    }
    // randomly open indices
    int numberOfIndicesToOpen = randomInt(Math.min(1, state.metaData().indices().size()));
    for (String index : randomSubsetOf(numberOfIndicesToOpen, state.metaData().indices().keys().toArray(String.class))) {
        OpenIndexRequest openIndexRequest = new OpenIndexRequest(state.metaData().index(index).getIndex().getName());
        state = cluster.openIndices(state, openIndexRequest);
    }
    // randomly update settings
    Set<String> indicesToUpdate = new HashSet<>();
    boolean containsClosedIndex = false;
    int numberOfIndicesToUpdate = randomInt(Math.min(2, state.metaData().indices().size()));
    for (String index : randomSubsetOf(numberOfIndicesToUpdate, state.metaData().indices().keys().toArray(String.class))) {
        indicesToUpdate.add(state.metaData().index(index).getIndex().getName());
        if (state.metaData().index(index).getState() == IndexMetaData.State.CLOSE) {
            containsClosedIndex = true;
        }
    }
    if (indicesToUpdate.isEmpty() == false) {
        UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(indicesToUpdate.toArray(new String[indicesToUpdate.size()]));
        Settings.Builder settings = Settings.builder();
        if (containsClosedIndex == false) {
            settings.put(SETTING_NUMBER_OF_REPLICAS, randomInt(2));
        }
        settings.put("index.refresh_interval", randomIntBetween(1, 5) + "s");
        updateSettingsRequest.settings(settings.build());
        state = cluster.updateSettings(state, updateSettingsRequest);
    }
    // randomly reroute
    if (rarely()) {
        state = cluster.reroute(state, new ClusterRerouteRequest());
    }
    // randomly start and fail allocated shards
    List<ShardRouting> startedShards = new ArrayList<>();
    List<FailedShard> failedShards = new ArrayList<>();
    for (DiscoveryNode node : state.nodes()) {
        IndicesClusterStateService indicesClusterStateService = clusterStateServiceMap.get(node);
        MockIndicesService indicesService = (MockIndicesService) indicesClusterStateService.indicesService;
        for (MockIndexService indexService : indicesService) {
            for (MockIndexShard indexShard : indexService) {
                ShardRouting persistedShardRouting = indexShard.routingEntry();
                if (persistedShardRouting.initializing() && randomBoolean()) {
                    startedShards.add(persistedShardRouting);
                } else if (rarely()) {
                    failedShards.add(new FailedShard(persistedShardRouting, "fake shard failure", new Exception()));
                }
            }
        }
    }
    state = cluster.applyFailedShards(state, failedShards);
    state = cluster.applyStartedShards(state, startedShards);
    // randomly add and remove nodes (except current master)
    if (rarely()) {
        if (randomBoolean()) {
            // add node
            if (state.nodes().getSize() < 10) {
                DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).add(createNode()).build();
                state = ClusterState.builder(state).nodes(newNodes).build();
                // always reroute after node leave
                state = cluster.reroute(state, new ClusterRerouteRequest());
                updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
            }
        } else {
            // remove node
            if (state.nodes().getDataNodes().size() > 3) {
                DiscoveryNode discoveryNode = randomFrom(state.nodes().getNodes().values().toArray(DiscoveryNode.class));
                if (discoveryNode.equals(state.nodes().getMasterNode()) == false) {
                    DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).remove(discoveryNode.getId()).build();
                    state = ClusterState.builder(state).nodes(newNodes).build();
                    state = cluster.deassociateDeadNodes(state, true, "removed and added a node");
                    updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
                }
                if (randomBoolean()) {
                    // and add it back
                    DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).add(discoveryNode).build();
                    state = ClusterState.builder(state).nodes(newNodes).build();
                    state = cluster.reroute(state, new ClusterRerouteRequest());
                    updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
                }
            }
        }
    }
    return state;
}
Also used : DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) UpdateSettingsRequest(org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest) ArrayList(java.util.ArrayList) DeleteIndexRequest(org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest) ClusterRerouteRequest(org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequest) Settings(org.elasticsearch.common.settings.Settings) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) HashSet(java.util.HashSet) OpenIndexRequest(org.elasticsearch.action.admin.indices.open.OpenIndexRequest) FailedShard(org.elasticsearch.cluster.routing.allocation.FailedShard) CloseIndexRequest(org.elasticsearch.action.admin.indices.close.CloseIndexRequest) CreateIndexRequest(org.elasticsearch.action.admin.indices.create.CreateIndexRequest) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting)

Example 4 with FailedShard

use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.

the class ShardFailedClusterStateTaskExecutorTests method testTriviallySuccessfulTasksBatchedWithFailingTasks.

public void testTriviallySuccessfulTasksBatchedWithFailingTasks() throws Exception {
    String reason = "test trivially successful tasks batched with failing tasks";
    ClusterState currentState = createClusterStateWithStartedShards(reason);
    List<ShardStateAction.ShardEntry> failingTasks = createExistingShards(currentState, reason);
    List<ShardStateAction.ShardEntry> nonExistentTasks = createNonExistentShards(currentState, reason);
    ShardStateAction.ShardFailedClusterStateTaskExecutor failingExecutor = new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocationService, null, logger) {

        @Override
        ClusterState applyFailedShards(ClusterState currentState, List<FailedShard> failedShards, List<StaleShard> staleShards) {
            throw new RuntimeException("simulated applyFailedShards failure");
        }
    };
    List<ShardStateAction.ShardEntry> tasks = new ArrayList<>();
    tasks.addAll(failingTasks);
    tasks.addAll(nonExistentTasks);
    ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = failingExecutor.execute(currentState, tasks);
    Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = failingTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure"))));
    taskResultMap.putAll(nonExistentTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success())));
    assertTaskResults(taskResultMap, result, currentState, false);
}
Also used : MetaData(org.elasticsearch.cluster.metadata.MetaData) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) ShardId(org.elasticsearch.index.shard.ShardId) IntStream(java.util.stream.IntStream) ESAllocationTestCase(org.elasticsearch.cluster.ESAllocationTestCase) ShardIterator(org.elasticsearch.cluster.routing.ShardIterator) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AllocationService(org.elasticsearch.cluster.routing.allocation.AllocationService) Index(org.elasticsearch.index.Index) ShardRoutingState(org.elasticsearch.cluster.routing.ShardRoutingState) Function(java.util.function.Function) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) GroupShardsIterator(org.elasticsearch.cluster.routing.GroupShardsIterator) ArrayList(java.util.ArrayList) CoreMatchers.instanceOf(org.hamcrest.CoreMatchers.instanceOf) ClusterState(org.elasticsearch.cluster.ClusterState) Settings(org.elasticsearch.common.settings.Settings) Map(java.util.Map) ClusterName(org.elasticsearch.cluster.ClusterName) RoutingNodes(org.elasticsearch.cluster.routing.RoutingNodes) ClusterRebalanceAllocationDecider(org.elasticsearch.cluster.routing.allocation.decider.ClusterRebalanceAllocationDecider) Before(org.junit.Before) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) UUIDs(org.elasticsearch.common.UUIDs) ClusterStateTaskExecutor(org.elasticsearch.cluster.ClusterStateTaskExecutor) StaleShard(org.elasticsearch.cluster.routing.allocation.StaleShard) ObjectCursor(com.carrotsearch.hppc.cursors.ObjectCursor) Collectors(java.util.stream.Collectors) List(java.util.List) Version(org.elasticsearch.Version) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) RoutingTable(org.elasticsearch.cluster.routing.RoutingTable) FailedShard(org.elasticsearch.cluster.routing.allocation.FailedShard) Collections(java.util.Collections) TestShardRouting(org.elasticsearch.cluster.routing.TestShardRouting) ClusterState(org.elasticsearch.cluster.ClusterState) ArrayList(java.util.ArrayList) ClusterStateTaskExecutor(org.elasticsearch.cluster.ClusterStateTaskExecutor) ArrayList(java.util.ArrayList) List(java.util.List)

Example 5 with FailedShard

use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.

the class GatewayAllocator method applyFailedShards.

public void applyFailedShards(final RoutingAllocation allocation, final List<FailedShard> failedShards) {
    for (FailedShard failedShard : failedShards) {
        Releasables.close(asyncFetchStarted.remove(failedShard.getRoutingEntry().shardId()));
        Releasables.close(asyncFetchStore.remove(failedShard.getRoutingEntry().shardId()));
    }
}
Also used : FailedShard(org.elasticsearch.cluster.routing.allocation.FailedShard)

Aggregations

FailedShard (org.elasticsearch.cluster.routing.allocation.FailedShard)10 ArrayList (java.util.ArrayList)4 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)4 HashSet (java.util.HashSet)3 ClusterState (org.elasticsearch.cluster.ClusterState)3 AllocationService (org.elasticsearch.cluster.routing.allocation.AllocationService)3 ShardId (org.elasticsearch.index.shard.ShardId)3 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)2 MetaData (org.elasticsearch.cluster.metadata.MetaData)2 DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)2 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)2 Settings (org.elasticsearch.common.settings.Settings)2 ObjectCursor (com.carrotsearch.hppc.cursors.ObjectCursor)1 IOException (java.io.IOException)1 Collections (java.util.Collections)1 List (java.util.List)1 Map (java.util.Map)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Function (java.util.function.Function)1 Collectors (java.util.stream.Collectors)1