Search in sources :

Example 26 with IndexRoutingTable

use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.

the class SnapshotsService method processWaitingShards.

private ImmutableOpenMap<ShardId, ShardSnapshotStatus> processWaitingShards(ImmutableOpenMap<ShardId, ShardSnapshotStatus> snapshotShards, RoutingTable routingTable) {
    boolean snapshotChanged = false;
    ImmutableOpenMap.Builder<ShardId, ShardSnapshotStatus> shards = ImmutableOpenMap.builder();
    for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardEntry : snapshotShards) {
        ShardSnapshotStatus shardStatus = shardEntry.value;
        ShardId shardId = shardEntry.key;
        if (shardStatus.state() == State.WAITING) {
            IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex());
            if (indexShardRoutingTable != null) {
                IndexShardRoutingTable shardRouting = indexShardRoutingTable.shard(shardId.id());
                if (shardRouting != null && shardRouting.primaryShard() != null) {
                    if (shardRouting.primaryShard().started()) {
                        // Shard that we were waiting for has started on a node, let's process it
                        snapshotChanged = true;
                        logger.trace("starting shard that we were waiting for [{}] on node [{}]", shardId, shardStatus.nodeId());
                        shards.put(shardId, new ShardSnapshotStatus(shardRouting.primaryShard().currentNodeId()));
                        continue;
                    } else if (shardRouting.primaryShard().initializing() || shardRouting.primaryShard().relocating()) {
                        // Shard that we were waiting for hasn't started yet or still relocating - will continue to wait
                        shards.put(shardId, shardStatus);
                        continue;
                    }
                }
            }
            // Shard that we were waiting for went into unassigned state or disappeared - giving up
            snapshotChanged = true;
            logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId());
            shards.put(shardId, new ShardSnapshotStatus(shardStatus.nodeId(), State.FAILED, "shard is unassigned"));
        } else {
            shards.put(shardId, shardStatus);
        }
    }
    if (snapshotChanged) {
        return shards.build();
    } else {
        return null;
    }
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap)

Example 27 with IndexRoutingTable

use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.

the class SnapshotsService method shards.

/**
     * Calculates the list of shards that should be included into the current snapshot
     *
     * @param clusterState cluster state
     * @param indices      list of indices to be snapshotted
     * @return list of shard to be included into current snapshot
     */
private ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(ClusterState clusterState, List<IndexId> indices) {
    ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
    MetaData metaData = clusterState.metaData();
    for (IndexId index : indices) {
        final String indexName = index.getName();
        IndexMetaData indexMetaData = metaData.index(indexName);
        if (indexMetaData == null) {
            // The index was deleted before we managed to start the snapshot - mark it as missing.
            builder.put(new ShardId(indexName, IndexMetaData.INDEX_UUID_NA_VALUE, 0), new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "missing index"));
        } else if (indexMetaData.getState() == IndexMetaData.State.CLOSE) {
            for (int i = 0; i < indexMetaData.getNumberOfShards(); i++) {
                ShardId shardId = new ShardId(indexMetaData.getIndex(), i);
                builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "index is closed"));
            }
        } else {
            IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(indexName);
            for (int i = 0; i < indexMetaData.getNumberOfShards(); i++) {
                ShardId shardId = new ShardId(indexMetaData.getIndex(), i);
                if (indexRoutingTable != null) {
                    ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
                    if (primary == null || !primary.assignedToNode()) {
                        builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated"));
                    } else if (primary.relocating() || primary.initializing()) {
                        // The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster
                        builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING));
                    } else if (!primary.started()) {
                        builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet"));
                    } else {
                        builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(primary.currentNodeId()));
                    }
                } else {
                    builder.put(shardId, new SnapshotsInProgress.ShardSnapshotStatus(null, State.MISSING, "missing routing table"));
                }
            }
        }
    }
    return builder.build();
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) IndexId(org.elasticsearch.repositories.IndexId) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) MetaData(org.elasticsearch.cluster.metadata.MetaData) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) RepositoriesMetaData(org.elasticsearch.cluster.metadata.RepositoriesMetaData) SnapshotsInProgress(org.elasticsearch.cluster.SnapshotsInProgress) ShardSnapshotStatus(org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData)

Example 28 with IndexRoutingTable

use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.

the class AwarenessAllocationIT method testSimpleAwareness.

public void testSimpleAwareness() throws Exception {
    Settings commonSettings = Settings.builder().put("cluster.routing.allocation.awareness.attributes", "rack_id").build();
    logger.info("--> starting 2 nodes on the same rack");
    internalCluster().startNodes(2, Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_1").build());
    createIndex("test1");
    createIndex("test2");
    NumShards test1 = getNumShards("test1");
    NumShards test2 = getNumShards("test2");
    //no replicas will be allocated as both indices end up on a single node
    final int totalPrimaries = test1.numPrimaries + test2.numPrimaries;
    ensureGreen();
    logger.info("--> starting 1 node on a different rack");
    final String node3 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.rack_id", "rack_2").build());
    // On slow machines the initial relocation might be delayed
    assertThat(awaitBusy(() -> {
        logger.info("--> waiting for no relocation");
        ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForNoRelocatingShards(true).get();
        if (clusterHealth.isTimedOut()) {
            return false;
        }
        logger.info("--> checking current state");
        ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
        // verify that we have all the primaries on node3
        ObjectIntHashMap<String> counts = new ObjectIntHashMap<>();
        for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
            for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
                for (ShardRouting shardRouting : indexShardRoutingTable) {
                    counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
                }
            }
        }
        return counts.get(node3) == totalPrimaries;
    }, 10, TimeUnit.SECONDS), equalTo(true));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ObjectIntHashMap(com.carrotsearch.hppc.ObjectIntHashMap) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings)

Example 29 with IndexRoutingTable

use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.

the class AwarenessAllocationIT method testAwarenessZonesIncrementalNodes.

public void testAwarenessZonesIncrementalNodes() throws Exception {
    Settings commonSettings = Settings.builder().put("cluster.routing.allocation.awareness.force.zone.values", "a,b").put("cluster.routing.allocation.awareness.attributes", "zone").build();
    logger.info("--> starting 2 nodes on zones 'a' & 'b'");
    List<String> nodes = internalCluster().startNodes(Settings.builder().put(commonSettings).put("node.attr.zone", "a").build(), Settings.builder().put(commonSettings).put("node.attr.zone", "b").build());
    String A_0 = nodes.get(0);
    String B_0 = nodes.get(1);
    client().admin().indices().prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", 5).put("index.number_of_replicas", 1)).execute().actionGet();
    ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").setWaitForNoRelocatingShards(true).execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    ClusterState clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
    ObjectIntHashMap<String> counts = new ObjectIntHashMap<>();
    for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
            for (ShardRouting shardRouting : indexShardRoutingTable) {
                counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
            }
        }
    }
    assertThat(counts.get(A_0), equalTo(5));
    assertThat(counts.get(B_0), equalTo(5));
    logger.info("--> starting another node in zone 'b'");
    String B_1 = internalCluster().startNode(Settings.builder().put(commonSettings).put("node.attr.zone", "b").build());
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    client().admin().cluster().prepareReroute().get();
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("3").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
    counts = new ObjectIntHashMap<>();
    for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
            for (ShardRouting shardRouting : indexShardRoutingTable) {
                counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
            }
        }
    }
    assertThat(counts.get(A_0), equalTo(5));
    assertThat(counts.get(B_0), equalTo(3));
    assertThat(counts.get(B_1), equalTo(2));
    String noZoneNode = internalCluster().startNode();
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    client().admin().cluster().prepareReroute().get();
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
    counts = new ObjectIntHashMap<>();
    for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
            for (ShardRouting shardRouting : indexShardRoutingTable) {
                counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
            }
        }
    }
    assertThat(counts.get(A_0), equalTo(5));
    assertThat(counts.get(B_0), equalTo(3));
    assertThat(counts.get(B_1), equalTo(2));
    assertThat(counts.containsKey(noZoneNode), equalTo(false));
    client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("cluster.routing.allocation.awareness.attributes", "").build()).get();
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("4").setWaitForActiveShards(10).setWaitForNoRelocatingShards(true).execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    clusterState = client().admin().cluster().prepareState().execute().actionGet().getState();
    counts = new ObjectIntHashMap<>();
    for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
            for (ShardRouting shardRouting : indexShardRoutingTable) {
                counts.addTo(clusterState.nodes().get(shardRouting.currentNodeId()).getName(), 1);
            }
        }
    }
    assertThat(counts.get(A_0), equalTo(3));
    assertThat(counts.get(B_0), equalTo(3));
    assertThat(counts.get(B_1), equalTo(2));
    assertThat(counts.get(noZoneNode), equalTo(2));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) IndexShardRoutingTable(org.elasticsearch.cluster.routing.IndexShardRoutingTable) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ObjectIntHashMap(com.carrotsearch.hppc.ObjectIntHashMap) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings)

Example 30 with IndexRoutingTable

use of org.elasticsearch.cluster.routing.IndexRoutingTable in project elasticsearch by elastic.

the class SharedClusterSnapshotRestoreIT method testSnapshotCanceledOnRemovedShard.

/**
     * This test ensures that when a shard is removed from a node (perhaps due to the node
     * leaving the cluster, then returning), all snapshotting of that shard is aborted, so
     * all Store references held onto by the snapshot are released.
     *
     * See https://github.com/elastic/elasticsearch/issues/20876
     */
public void testSnapshotCanceledOnRemovedShard() throws Exception {
    final int numPrimaries = 1;
    final int numReplicas = 1;
    final int numDocs = 100;
    final String repo = "test-repo";
    final String index = "test-idx";
    final String snapshot = "test-snap";
    assertAcked(prepareCreate(index, 1, Settings.builder().put("number_of_shards", numPrimaries).put("number_of_replicas", numReplicas)));
    logger.info("--> indexing some data");
    for (int i = 0; i < numDocs; i++) {
        index(index, "doc", Integer.toString(i), "foo", "bar" + i);
    }
    refresh();
    logger.info("--> creating repository");
    PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository(repo).setType("mock").setSettings(Settings.builder().put("location", randomRepoPath()).put("random", randomAsciiOfLength(10)).put("wait_after_unblock", 200)).get();
    assertTrue(putRepositoryResponse.isAcknowledged());
    String blockedNode = blockNodeWithIndex(repo, index);
    logger.info("--> snapshot");
    client().admin().cluster().prepareCreateSnapshot(repo, snapshot).setWaitForCompletion(false).execute();
    logger.info("--> waiting for block to kick in on node [{}]", blockedNode);
    waitForBlock(blockedNode, repo, TimeValue.timeValueSeconds(10));
    logger.info("--> removing primary shard that is being snapshotted");
    ClusterState clusterState = internalCluster().clusterService(internalCluster().getMasterName()).state();
    IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index);
    String nodeWithPrimary = clusterState.nodes().get(indexRoutingTable.shard(0).primaryShard().currentNodeId()).getName();
    assertNotNull("should be at least one node with a primary shard", nodeWithPrimary);
    IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeWithPrimary);
    IndexService indexService = indicesService.indexService(resolveIndex(index));
    indexService.removeShard(0, "simulate node removal");
    logger.info("--> unblocking blocked node [{}]", blockedNode);
    unblockNode(repo, blockedNode);
    logger.info("--> ensuring snapshot is aborted and the aborted shard was marked as failed");
    SnapshotInfo snapshotInfo = waitForCompletion(repo, snapshot, TimeValue.timeValueSeconds(10));
    assertEquals(1, snapshotInfo.shardFailures().size());
    assertEquals(0, snapshotInfo.shardFailures().get(0).shardId());
    assertEquals("IndexShardSnapshotFailedException[Aborted]", snapshotInfo.shardFailures().get(0).reason());
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) IndexRoutingTable(org.elasticsearch.cluster.routing.IndexRoutingTable) IndexService(org.elasticsearch.index.IndexService) IndicesService(org.elasticsearch.indices.IndicesService) Matchers.containsString(org.hamcrest.Matchers.containsString) PutRepositoryResponse(org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse)

Aggregations

IndexRoutingTable (org.elasticsearch.cluster.routing.IndexRoutingTable)35 IndexShardRoutingTable (org.elasticsearch.cluster.routing.IndexShardRoutingTable)29 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)21 ClusterState (org.elasticsearch.cluster.ClusterState)18 RoutingTable (org.elasticsearch.cluster.routing.RoutingTable)15 IndexMetaData (org.elasticsearch.cluster.metadata.IndexMetaData)14 ShardId (org.elasticsearch.index.shard.ShardId)11 HashSet (java.util.HashSet)7 DiscoveryNode (org.elasticsearch.cluster.node.DiscoveryNode)6 DiscoveryNodes (org.elasticsearch.cluster.node.DiscoveryNodes)6 MetaData (org.elasticsearch.cluster.metadata.MetaData)5 Settings (org.elasticsearch.common.settings.Settings)5 ObjectIntHashMap (com.carrotsearch.hppc.ObjectIntHashMap)3 ArrayList (java.util.ArrayList)3 Set (java.util.Set)3 ClusterName (org.elasticsearch.cluster.ClusterName)3 TestShardRouting (org.elasticsearch.cluster.routing.TestShardRouting)3 UnassignedInfo (org.elasticsearch.cluster.routing.UnassignedInfo)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2