use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.
the class ClusterRerouteTests method testClusterStateUpdateTask.
public void testClusterStateUpdateTask() {
AllocationService allocationService = new AllocationService(Settings.builder().build(), new AllocationDeciders(Settings.EMPTY, Collections.singleton(new MaxRetryAllocationDecider(Settings.EMPTY))), new TestGatewayAllocator(), new BalancedShardsAllocator(Settings.EMPTY), EmptyClusterInfoService.INSTANCE);
ClusterState clusterState = createInitialClusterState(allocationService);
ClusterRerouteRequest req = new ClusterRerouteRequest();
req.dryRun(true);
AtomicReference<ClusterRerouteResponse> responseRef = new AtomicReference<>();
ActionListener<ClusterRerouteResponse> responseActionListener = new ActionListener<ClusterRerouteResponse>() {
@Override
public void onResponse(ClusterRerouteResponse clusterRerouteResponse) {
responseRef.set(clusterRerouteResponse);
}
@Override
public void onFailure(Exception e) {
}
};
TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask task = new TransportClusterRerouteAction.ClusterRerouteResponseAckedClusterStateUpdateTask(logger, allocationService, req, responseActionListener);
ClusterState execute = task.execute(clusterState);
// dry-run
assertSame(execute, clusterState);
task.onAllNodesAcked(null);
assertNotSame(responseRef.get().getState(), execute);
// now we allocate
req.dryRun(false);
final int retries = MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY.get(Settings.EMPTY);
// now fail it N-1 times
for (int i = 0; i < retries; i++) {
ClusterState newState = task.execute(clusterState);
// dry-run=false
assertNotSame(newState, clusterState);
clusterState = newState;
RoutingTable routingTable = clusterState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i);
List<FailedShard> failedShards = Collections.singletonList(new FailedShard(routingTable.index("idx").shard(0).shards().get(0), "boom" + i, new UnsupportedOperationException()));
newState = allocationService.applyFailedShards(clusterState, failedShards);
assertThat(newState, not(equalTo(clusterState)));
clusterState = newState;
routingTable = clusterState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
if (i == retries - 1) {
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
} else {
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
}
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), i + 1);
}
// without retry_failed we won't allocate that shard
ClusterState newState = task.execute(clusterState);
// dry-run=false
assertNotSame(newState, clusterState);
task.onAllNodesAcked(null);
assertSame(responseRef.get().getState(), newState);
RoutingTable routingTable = clusterState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), UNASSIGNED);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
// now we manually retry and get the shard back into initializing
req.setRetryFailed(true);
newState = task.execute(clusterState);
// dry-run=false
assertNotSame(newState, clusterState);
clusterState = newState;
routingTable = clusterState.routingTable();
assertEquals(routingTable.index("idx").shards().size(), 1);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).state(), INITIALIZING);
assertEquals(routingTable.index("idx").shard(0).shards().get(0).unassignedInfo().getNumFailedAllocations(), retries);
}
use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.
the class UnassignedInfoTests method testFailedShard.
/**
* Verifies that when a shard fails, reason is properly set and details are preserved.
*/
public void testFailedShard() {
AllocationService allocation = createAllocationService();
MetaData metaData = MetaData.builder().put(IndexMetaData.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)).build();
ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metaData(metaData).routingTable(RoutingTable.builder().addAsNew(metaData.index("test")).build()).build();
clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder().add(newNode("node1")).add(newNode("node2"))).build();
clusterState = allocation.reroute(clusterState, "reroute");
// starting primaries
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
// starting replicas
clusterState = allocation.applyStartedShards(clusterState, clusterState.getRoutingNodes().shardsWithState(INITIALIZING));
assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(false));
// fail shard
ShardRouting shardToFail = clusterState.getRoutingNodes().shardsWithState(STARTED).get(0);
clusterState = allocation.applyFailedShards(clusterState, Collections.singletonList(new FailedShard(shardToFail, "test fail", null)));
// verify the reason and details
assertThat(clusterState.getRoutingNodes().unassigned().size() > 0, equalTo(true));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).size(), equalTo(1));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo(), notNullValue());
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getMessage(), equalTo("test fail"));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getDetails(), equalTo("test fail"));
assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED).get(0).unassignedInfo().getUnassignedTimeInMillis(), greaterThan(0L));
}
use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.
the class IndicesClusterStateServiceRandomUpdatesTests method randomlyUpdateClusterState.
public ClusterState randomlyUpdateClusterState(ClusterState state, Map<DiscoveryNode, IndicesClusterStateService> clusterStateServiceMap, Supplier<MockIndicesService> indicesServiceSupplier) {
// randomly create new indices (until we have 200 max)
for (int i = 0; i < randomInt(5); i++) {
if (state.metaData().indices().size() > 200) {
break;
}
String name = "index_" + randomAsciiOfLength(15).toLowerCase(Locale.ROOT);
Settings.Builder settingsBuilder = Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 3)).put(SETTING_NUMBER_OF_REPLICAS, randomInt(2));
if (randomBoolean()) {
settingsBuilder.put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true);
}
CreateIndexRequest request = new CreateIndexRequest(name, settingsBuilder.build()).waitForActiveShards(ActiveShardCount.NONE);
state = cluster.createIndex(state, request);
assertTrue(state.metaData().hasIndex(name));
}
// randomly delete indices
Set<String> indicesToDelete = new HashSet<>();
int numberOfIndicesToDelete = randomInt(Math.min(2, state.metaData().indices().size()));
for (String index : randomSubsetOf(numberOfIndicesToDelete, state.metaData().indices().keys().toArray(String.class))) {
indicesToDelete.add(state.metaData().index(index).getIndex().getName());
}
if (indicesToDelete.isEmpty() == false) {
DeleteIndexRequest deleteRequest = new DeleteIndexRequest(indicesToDelete.toArray(new String[indicesToDelete.size()]));
state = cluster.deleteIndices(state, deleteRequest);
for (String index : indicesToDelete) {
assertFalse(state.metaData().hasIndex(index));
}
}
// randomly close indices
int numberOfIndicesToClose = randomInt(Math.min(1, state.metaData().indices().size()));
for (String index : randomSubsetOf(numberOfIndicesToClose, state.metaData().indices().keys().toArray(String.class))) {
CloseIndexRequest closeIndexRequest = new CloseIndexRequest(state.metaData().index(index).getIndex().getName());
state = cluster.closeIndices(state, closeIndexRequest);
}
// randomly open indices
int numberOfIndicesToOpen = randomInt(Math.min(1, state.metaData().indices().size()));
for (String index : randomSubsetOf(numberOfIndicesToOpen, state.metaData().indices().keys().toArray(String.class))) {
OpenIndexRequest openIndexRequest = new OpenIndexRequest(state.metaData().index(index).getIndex().getName());
state = cluster.openIndices(state, openIndexRequest);
}
// randomly update settings
Set<String> indicesToUpdate = new HashSet<>();
boolean containsClosedIndex = false;
int numberOfIndicesToUpdate = randomInt(Math.min(2, state.metaData().indices().size()));
for (String index : randomSubsetOf(numberOfIndicesToUpdate, state.metaData().indices().keys().toArray(String.class))) {
indicesToUpdate.add(state.metaData().index(index).getIndex().getName());
if (state.metaData().index(index).getState() == IndexMetaData.State.CLOSE) {
containsClosedIndex = true;
}
}
if (indicesToUpdate.isEmpty() == false) {
UpdateSettingsRequest updateSettingsRequest = new UpdateSettingsRequest(indicesToUpdate.toArray(new String[indicesToUpdate.size()]));
Settings.Builder settings = Settings.builder();
if (containsClosedIndex == false) {
settings.put(SETTING_NUMBER_OF_REPLICAS, randomInt(2));
}
settings.put("index.refresh_interval", randomIntBetween(1, 5) + "s");
updateSettingsRequest.settings(settings.build());
state = cluster.updateSettings(state, updateSettingsRequest);
}
// randomly reroute
if (rarely()) {
state = cluster.reroute(state, new ClusterRerouteRequest());
}
// randomly start and fail allocated shards
List<ShardRouting> startedShards = new ArrayList<>();
List<FailedShard> failedShards = new ArrayList<>();
for (DiscoveryNode node : state.nodes()) {
IndicesClusterStateService indicesClusterStateService = clusterStateServiceMap.get(node);
MockIndicesService indicesService = (MockIndicesService) indicesClusterStateService.indicesService;
for (MockIndexService indexService : indicesService) {
for (MockIndexShard indexShard : indexService) {
ShardRouting persistedShardRouting = indexShard.routingEntry();
if (persistedShardRouting.initializing() && randomBoolean()) {
startedShards.add(persistedShardRouting);
} else if (rarely()) {
failedShards.add(new FailedShard(persistedShardRouting, "fake shard failure", new Exception()));
}
}
}
}
state = cluster.applyFailedShards(state, failedShards);
state = cluster.applyStartedShards(state, startedShards);
// randomly add and remove nodes (except current master)
if (rarely()) {
if (randomBoolean()) {
// add node
if (state.nodes().getSize() < 10) {
DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).add(createNode()).build();
state = ClusterState.builder(state).nodes(newNodes).build();
// always reroute after node leave
state = cluster.reroute(state, new ClusterRerouteRequest());
updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
}
} else {
// remove node
if (state.nodes().getDataNodes().size() > 3) {
DiscoveryNode discoveryNode = randomFrom(state.nodes().getNodes().values().toArray(DiscoveryNode.class));
if (discoveryNode.equals(state.nodes().getMasterNode()) == false) {
DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).remove(discoveryNode.getId()).build();
state = ClusterState.builder(state).nodes(newNodes).build();
state = cluster.deassociateDeadNodes(state, true, "removed and added a node");
updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
}
if (randomBoolean()) {
// and add it back
DiscoveryNodes newNodes = DiscoveryNodes.builder(state.nodes()).add(discoveryNode).build();
state = ClusterState.builder(state).nodes(newNodes).build();
state = cluster.reroute(state, new ClusterRerouteRequest());
updateNodes(state, clusterStateServiceMap, indicesServiceSupplier);
}
}
}
}
return state;
}
use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.
the class ShardFailedClusterStateTaskExecutorTests method testTriviallySuccessfulTasksBatchedWithFailingTasks.
public void testTriviallySuccessfulTasksBatchedWithFailingTasks() throws Exception {
String reason = "test trivially successful tasks batched with failing tasks";
ClusterState currentState = createClusterStateWithStartedShards(reason);
List<ShardStateAction.ShardEntry> failingTasks = createExistingShards(currentState, reason);
List<ShardStateAction.ShardEntry> nonExistentTasks = createNonExistentShards(currentState, reason);
ShardStateAction.ShardFailedClusterStateTaskExecutor failingExecutor = new ShardStateAction.ShardFailedClusterStateTaskExecutor(allocationService, null, logger) {
@Override
ClusterState applyFailedShards(ClusterState currentState, List<FailedShard> failedShards, List<StaleShard> staleShards) {
throw new RuntimeException("simulated applyFailedShards failure");
}
};
List<ShardStateAction.ShardEntry> tasks = new ArrayList<>();
tasks.addAll(failingTasks);
tasks.addAll(nonExistentTasks);
ClusterStateTaskExecutor.ClusterTasksResult<ShardStateAction.ShardEntry> result = failingExecutor.execute(currentState, tasks);
Map<ShardStateAction.ShardEntry, ClusterStateTaskExecutor.TaskResult> taskResultMap = failingTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.failure(new RuntimeException("simulated applyFailedShards failure"))));
taskResultMap.putAll(nonExistentTasks.stream().collect(Collectors.toMap(Function.identity(), task -> ClusterStateTaskExecutor.TaskResult.success())));
assertTaskResults(taskResultMap, result, currentState, false);
}
use of org.elasticsearch.cluster.routing.allocation.FailedShard in project elasticsearch by elastic.
the class GatewayAllocator method applyFailedShards.
public void applyFailedShards(final RoutingAllocation allocation, final List<FailedShard> failedShards) {
for (FailedShard failedShard : failedShards) {
Releasables.close(asyncFetchStarted.remove(failedShard.getRoutingEntry().shardId()));
Releasables.close(asyncFetchStore.remove(failedShard.getRoutingEntry().shardId()));
}
}
Aggregations