use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.
the class SnapshotsService method processWaitingShards.
private ImmutableOpenMap<ShardId, ShardSnapshotStatus> processWaitingShards(ImmutableOpenMap<ShardId, ShardSnapshotStatus> snapshotShards, RoutingTable routingTable) {
boolean snapshotChanged = false;
ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshotShards) {
ShardSnapshotStatus shardStatus = shardEntry.value;
ShardId shardId = shardEntry.key;
if (shardStatus.state() == State.WAITING) {
IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex());
if (indexShardRoutingTable != null) {
IndexShardRoutingTable shardRouting = indexShardRoutingTable.shard(shardId.id());
if (shardRouting != null && shardRouting.primaryShard() != null) {
if (shardRouting.primaryShard().started()) {
// Shard that we were waiting for has started on a node, let's process it
snapshotChanged = true;
logger.trace("starting shard that we were waiting for [{}] on node [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardRouting.primaryShard().currentNodeId()));
continue;
} else if (shardRouting.primaryShard().initializing() || shardRouting.primaryShard().relocating()) {
// Shard that we were waiting for hasn't started yet or still relocating - will continue to wait
shards.put(shardId, shardStatus);
continue;
}
}
}
// Shard that we were waiting for went into unassigned state or disappeared - giving up
snapshotChanged = true;
logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId());
shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "shard is unassigned"));
} else {
shards.put(shardId, shardStatus);
}
}
if (snapshotChanged) {
return shards.build();
} else {
return null;
}
}
use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.
the class SnapshotsService method shards.
/**
* Calculates the list of shards that should be included into the current snapshot
*
* @param clusterState cluster state
* @param indices list of indices to be snapshotted
* @return list of shard to be included into current snapshot
*/
private ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(ClusterState clusterState, List<IndexId> indices) {
ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
MetaData metaData = clusterState.metaData();
for (IndexId index : indices) {
final String indexName = index.getName();
IndexMetaData indexMetaData = metaData.index(indexName);
if (indexMetaData == null) {
// The index was deleted before we managed to start the snapshot - mark it as missing.
builder.put(new ShardId(indexName, IndexMetaData.INDEX_UUID_NA_VALUE, 0), new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "missing index"));
} else if (indexMetaData.getState() == IndexMetaData.State.CLOSE) {
for (int i = 0; i < indexMetaData.getNumberOfShards(); i++) {
ShardId shardId = new ShardId(indexMetaData.getIndex(), i);
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "index is closed"));
}
} else {
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(indexName);
for (int i = 0; i < indexMetaData.getNumberOfShards(); i++) {
ShardId shardId = new ShardId(indexMetaData.getIndex(), i);
if (indexRoutingTable != null) {
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
if (primary == null || !primary.assignedToNode()) {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated"));
} else if (primary.relocating() || primary.initializing()) {
// The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING));
} else if (!primary.started()) {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet"));
} else {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId()));
}
} else {
builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "missing routing table"));
}
}
}
}
return builder.build();
}
use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.
the class AwarenessAllocationIT method testSimpleAwareness.
public void testSimpleAwareness() throws Exception {
Settings commonSettings = Settings.builder().put("cluster.routing.allocation.awareness.attributes", "rack_id").build();
logger.info("--> starting 2 nodes on the same rack");
internalCluster().startNodes(2, Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_1").build());
createIndex("test1");
createIndex("test2");
NumShards test1 = getNumShards("test1");
NumShards test2 = getNumShards("test2");
//no replicas will be allocated as both indices end up on a single node
final int totalPrimaries = test1.numPrimaries + test2.numPrimaries;
ensureGreen();
logger.info("--> starting 1 node on a different rack");
final String node3 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_2").build());
// On slow machines the initial relocation might be delayed
assertThat(awaitBusy(() -> {
logger.info("--> waiting for no relocation");
ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForNoRelocatingShards(true).get();
if (clusterHealth.isTimedOut()) {
return false;
}
logger.info("--> checking current state");
ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
// verify that we have all the primaries on node3
ObjectIntHashMap<String> counts = new ObjectIntHashMap<>();
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
for (ShardRouting shardRouting : indexShardRoutingTable) {
counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
}
}
}
return counts.get(node3) == totalPrimaries;
}, 10, TimeUnit.SECONDS), equalTo(true));
}
use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.
the class AwarenessAllocationIT method testAwarenessZonesIncrementalNodes.
public void testAwarenessZonesIncrementalNodes() throws Exception {
Settings commonSettings = Settings.builder().put("cluster.routing.allocation.awareness.force.zone.values", "a,b").put("cluster.routing.allocation.awareness.attributes", "zone").build();
logger.info("--> starting 2 nodes on zones 'a' & 'b'");
List<String> nodes = internalCluster().startNodes(Settings.builder().put(commonSettings).put("node.attr.zone", "a").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "b").build());
String A_0 = nodes.get(0);
String B_0 = nodes.get(1);
client().admin().indices().prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", 5).put("index.number_of_replicas", 1)).execute().actionGet();
ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
ObjectIntHashMap<String> counts = new ObjectIntHashMap<>();
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
for (ShardRouting shardRouting : indexShardRoutingTable) {
counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
}
}
}
assertThat(counts.get(A_0), equalTo(5));
assertThat(counts.get(B_0), equalTo(5));
logger.info("--> starting another node in zone 'b'");
String B_1 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build());
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
client().admin().cluster().prepareReroute().get();
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
counts = new ObjectIntHashMap<>();
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
for (ShardRouting shardRouting : indexShardRoutingTable) {
counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
}
}
}
assertThat(counts.get(A_0), equalTo(5));
assertThat(counts.get(B_0), equalTo(3));
assertThat(counts.get(B_1), equalTo(2));
String noZoneNode = internalCluster().startNode();
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
client().admin().cluster().prepareReroute().get();
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
counts = new ObjectIntHashMap<>();
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
for (ShardRouting shardRouting : indexShardRoutingTable) {
counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
}
}
}
assertThat(counts.get(A_0), equalTo(5));
assertThat(counts.get(B_0), equalTo(3));
assertThat(counts.get(B_1), equalTo(2));
assertThat(counts.containsKey(noZoneNode), equalTo(false));
client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("cluster.routing.allocation.awareness.attributes", "").build()).get();
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
counts = new ObjectIntHashMap<>();
for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
for (ShardRouting shardRouting : indexShardRoutingTable) {
counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
}
}
}
assertThat(counts.get(A_0), equalTo(3));
assertThat(counts.get(B_0), equalTo(3));
assertThat(counts.get(B_1), equalTo(2));
assertThat(counts.get(noZoneNode), equalTo(2));
}
use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.
the class SharedClusterSnapshotRestoreIT method testSnapshotCanceledOnRemovedShard.
/**
* This test ensures that when a shard is removed from a node (perhaps due to the node
* leaving the cluster, then returning), all snapshotting of that shard is aborted, so
* all Store references held onto by the snapshot are released.
*
* See https://github.com/elastic/elasticsearch/issues/20876
*/
public void testSnapshotCanceledOnRemovedShard() throws Exception {
final int numPrimaries = 1;
final int numReplicas = 1;
final int numDocs = 100;
final String repo = "test-repo";
final String index = "test-idx";
final String snapshot = "test-snap";
assertAcked(prepareCreate(index, 1, Settings.builder().put("number_of_shards", numPrimaries).put("number_of_replicas", numReplicas)));
logger.info("--> indexing some data");
for (int i = 0; i < numDocs; i++) {
index(index, "doc", Integer.toString(i), "foo", "bar" + i);
}
refresh();
logger.info("--> creating repository");
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository(repo).setType("mock").setSettings(Settings.builder().put("location", randomRepoPath()).put("random", randomAsciiOfLength(10)).put("wait_after_unblock", 200)).get();
assertTrue(putRepositoryResponse.isAcknowledged());
String blockedNode = blockNodeWithIndex(repo, index);
logger.info("--> snapshot");
client().admin().cluster().prepareCreateSnapshot(repo, snapshot).setWaitForCompletion(false).execute();
logger.info("--> waiting for block to kick in on node [{}]", blockedNode);
waitForBlock(blockedNode, repo, TimeValue.timeValueSeconds(10));
logger.info("--> removing primary shard that is being snapshotted");
ClusterState clusterState = internalCluster().clusterService(internalCluster().getMasterName()).state();
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index);
String nodeWithPrimary = clusterState.nodes().get(indexRoutingTable.shard(0).primaryShard().currentNodeId()).getName();
assertNotNull("should be at least one node with a primary shard", nodeWithPrimary);
IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeWithPrimary);
IndexService indexService = indicesService.indexService(resolveIndex(index));
indexService.removeShard(0, "simulate node removal");
logger.info("--> unblocking blocked node [{}]", blockedNode);
unblockNode(repo, blockedNode);
logger.info("--> ensuring snapshot is aborted and the aborted shard was marked as failed");
SnapshotInfo snapshotInfo = waitForCompletion(repo, snapshot, TimeValue.timeValueSeconds(10));
assertEquals(1, snapshotInfo.shardFailures().size());
assertEquals(0, snapshotInfo.shardFailures().get(0).shardId());
assertEquals("IndexShardSnapshotFailedException[Aborted]", snapshotInfo.shardFailures().get(0).reason());
}
Aggregations