Search in sources :

Example 36 with ClusterState

use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.

the class FeaturesSelectionStream method getShardUrls.

private List<String> getShardUrls() throws IOException {
    try {
        ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
        Collection<Slice> slices = CloudSolrStream.getSlices(this.collection, zkStateReader, false);
        ClusterState clusterState = zkStateReader.getClusterState();
        Set<String> liveNodes = clusterState.getLiveNodes();
        List<String> baseUrls = new ArrayList<>();
        for (Slice slice : slices) {
            Collection<Replica> replicas = slice.getReplicas();
            List<Replica> shuffler = new ArrayList<>();
            for (Replica replica : replicas) {
                if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
                    shuffler.add(replica);
                }
            }
            Collections.shuffle(shuffler, new Random());
            Replica rep = shuffler.get(0);
            ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep);
            String url = zkProps.getCoreUrl();
            baseUrls.add(url);
        }
        return baseUrls;
    } catch (Exception e) {
        throw new IOException(e);
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Replica(org.apache.solr.common.cloud.Replica) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Random(java.util.Random) Slice(org.apache.solr.common.cloud.Slice)

Example 37 with ClusterState

use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.

the class ShardSplitTest method testSplitStaticIndexReplication.

/*
  Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
  Add a replica. Ensure count matches in leader and replica.
   */
public void testSplitStaticIndexReplication() throws Exception {
    waitForThingsToLevelOut(15);
    DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
    Replica replica = defCol.getReplicas().get(0);
    String nodeName = replica.getNodeName();
    String collectionName = "testSplitStaticIndexReplication";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
    // some high number so we can create replicas without hindrance
    create.setMaxShardsPerNode(5);
    // we want to create the leader on a fixed node so that we know which one to restart later
    create.setCreateNodeSet(nodeName);
    create.process(cloudClient);
    try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
        client.setDefaultCollection(collectionName);
        StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
        try {
            thread.start();
            // give the indexer sometime to do its work
            Thread.sleep(1000);
            thread.safeStop();
            thread.join();
            client.commit();
            controlClient.commit();
            CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
            splitShard.setShardName(SHARD1);
            String asyncId = splitShard.processAsync(client);
            RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
            if (state == RequestStatusState.COMPLETED) {
                waitForRecoveriesToFinish(collectionName, true);
                // let's wait to see parent shard become inactive
                CountDownLatch latch = new CountDownLatch(1);
                client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {

                    @Override
                    public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
                        Slice parent = collectionState.getSlice(SHARD1);
                        Slice slice10 = collectionState.getSlice(SHARD1_0);
                        Slice slice11 = collectionState.getSlice(SHARD1_1);
                        if (slice10 != null && slice11 != null && parent.getState() == Slice.State.INACTIVE && slice10.getState() == Slice.State.ACTIVE && slice11.getState() == Slice.State.ACTIVE) {
                            latch.countDown();
                            // removes the watch
                            return true;
                        }
                        return false;
                    }
                });
                latch.await(1, TimeUnit.MINUTES);
                if (latch.getCount() != 0) {
                    // sanity check
                    fail("Sub-shards did not become active even after waiting for 1 minute");
                }
                int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
                // restart the sub-shard leader node
                boolean restarted = false;
                for (JettySolrRunner jetty : jettys) {
                    int port = jetty.getBaseUrl().getPort();
                    if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
                        ChaosMonkey.kill(jetty);
                        ChaosMonkey.start(jetty);
                        restarted = true;
                        break;
                    }
                }
                if (!restarted) {
                    // sanity check
                    fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
                }
                // add a new replica for the sub-shard
                CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
                // use control client because less chances of it being the node being restarted
                // this is to avoid flakiness of test because of NoHttpResponseExceptions
                String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
                try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
                    state = addReplica.processAndWait(control, 30);
                }
                if (state == RequestStatusState.COMPLETED) {
                    CountDownLatch newReplicaLatch = new CountDownLatch(1);
                    client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {

                        @Override
                        public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
                            if (liveNodes.size() != liveNodeCount) {
                                return false;
                            }
                            Slice slice = collectionState.getSlice(SHARD1_0);
                            if (slice.getReplicas().size() == 2) {
                                if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
                                    // we see replicas and none of them are recovering
                                    newReplicaLatch.countDown();
                                    return true;
                                }
                            }
                            return false;
                        }
                    });
                    newReplicaLatch.await(30, TimeUnit.SECONDS);
                    // check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
                    // handle new shards/replica so well.
                    ClusterState clusterState = client.getZkStateReader().getClusterState();
                    DocCollection collection = clusterState.getCollection(collectionName);
                    int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
                    assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
                } else {
                    fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
                }
            } else {
                fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
            }
        } finally {
            thread.safeStop();
            thread.join();
        }
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) CollectionStateWatcher(org.apache.solr.common.cloud.CollectionStateWatcher) CountDownLatch(java.util.concurrent.CountDownLatch) Replica(org.apache.solr.common.cloud.Replica) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) RequestStatusState(org.apache.solr.client.solrj.response.RequestStatusState) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection)

Example 38 with ClusterState

use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.

the class ShardSplitTest method splitByRouteFieldTest.

public void splitByRouteFieldTest() throws Exception {
    log.info("Starting testSplitWithRouteField");
    String collectionName = "routeFieldColl";
    int numShards = 4;
    int replicationFactor = 2;
    int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient().getZkStateReader().getClusterState().getLiveNodes().size())) + 1;
    HashMap<String, List<Integer>> collectionInfos = new HashMap<>();
    String shard_fld = "shard_s";
    try (CloudSolrClient client = createCloudClient(null)) {
        Map<String, Object> props = Utils.makeMap(REPLICATION_FACTOR, replicationFactor, MAX_SHARDS_PER_NODE, maxShardsPerNode, NUM_SLICES, numShards, "router.field", shard_fld);
        createCollection(collectionInfos, collectionName, props, client);
    }
    List<Integer> list = collectionInfos.get(collectionName);
    checkForCollection(collectionName, list, null);
    waitForRecoveriesToFinish(false);
    String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName);
    try (HttpSolrClient collectionClient = getHttpSolrClient(url)) {
        ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
        final DocRouter router = clusterState.getCollection(collectionName).getRouter();
        Slice shard1 = clusterState.getSlice(collectionName, SHARD1);
        DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
        final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
        final int[] docCounts = new int[ranges.size()];
        for (int i = 100; i <= 200; i++) {
            // See comment in ShardRoutingTest for hash distribution
            String shardKey = "" + (char) ('a' + (i % 26));
            collectionClient.add(getDoc(id, i, "n_ti", i, shard_fld, shardKey));
            int idx = getHashRangeIdx(router, ranges, shardKey);
            if (idx != -1) {
                docCounts[idx]++;
            }
        }
        for (int i = 0; i < docCounts.length; i++) {
            int docCount = docCounts[i];
            log.info("Shard {} docCount = {}", "shard1_" + i, docCount);
        }
        collectionClient.commit();
        for (int i = 0; i < 3; i++) {
            try {
                splitShard(collectionName, SHARD1, null, null);
                break;
            } catch (HttpSolrClient.RemoteSolrException e) {
                if (e.code() != 500) {
                    throw e;
                }
                log.error("SPLITSHARD failed. " + (i < 2 ? " Retring split" : ""), e);
                if (i == 2) {
                    fail("SPLITSHARD was not successful even after three tries");
                }
            }
        }
        waitForRecoveriesToFinish(collectionName, false);
        assertEquals(docCounts[0], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_0")).getResults().getNumFound());
        assertEquals(docCounts[1], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_1")).getResults().getNumFound());
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) SolrQuery(org.apache.solr.client.solrj.SolrQuery) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) Slice(org.apache.solr.common.cloud.Slice) DocRouter(org.apache.solr.common.cloud.DocRouter) List(java.util.List) ArrayList(java.util.ArrayList)

Example 39 with ClusterState

use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.

the class ShardSplitTest method testSplitAfterFailedSplit.

/**
   * Used to test that we can split a shard when a previous split event
   * left sub-shards in construction or recovery state.
   *
   * See SOLR-9439
   */
@Test
public void testSplitAfterFailedSplit() throws Exception {
    waitForThingsToLevelOut(15);
    // we definitely want split to fail
    TestInjection.splitFailureBeforeReplicaCreation = "true:100";
    try {
        try {
            CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
            splitShard.setShardName(SHARD1);
            splitShard.process(cloudClient);
            fail("Shard split was not supposed to succeed after failure injection!");
        } catch (Exception e) {
        // expected
        }
        // assert that sub-shards cores exist and sub-shard is in construction state
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        zkStateReader.forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
        ClusterState state = zkStateReader.getClusterState();
        DocCollection collection = state.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
        Slice shard10 = collection.getSlice(SHARD1_0);
        assertEquals(Slice.State.CONSTRUCTION, shard10.getState());
        assertEquals(1, shard10.getReplicas().size());
        Slice shard11 = collection.getSlice(SHARD1_1);
        assertEquals(Slice.State.CONSTRUCTION, shard11.getState());
        assertEquals(1, shard11.getReplicas().size());
        // lets retry the split
        // let the split succeed
        TestInjection.reset();
        try {
            CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
            splitShard.setShardName(SHARD1);
            splitShard.process(cloudClient);
        // Yay!
        } catch (Exception e) {
            log.error("Shard split failed", e);
            fail("Shard split did not succeed after a previous failed split attempt left sub-shards in construction state");
        }
    } finally {
        TestInjection.reset();
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) Test(org.junit.Test)

Example 40 with ClusterState

use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.

the class ShardSplitTest method checkDocCountsAndShardStates.

protected void checkDocCountsAndShardStates(int[] docCounts, int numReplicas) throws Exception {
    ClusterState clusterState = null;
    Slice slice1_0 = null, slice1_1 = null;
    int i = 0;
    for (i = 0; i < 10; i++) {
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        clusterState = zkStateReader.getClusterState();
        slice1_0 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, "shard1_0");
        slice1_1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, "shard1_1");
        if (slice1_0.getState() == Slice.State.ACTIVE && slice1_1.getState() == Slice.State.ACTIVE) {
            break;
        }
        Thread.sleep(500);
    }
    log.info("ShardSplitTest waited for {} ms for shard state to be set to active", i * 500);
    assertNotNull("Cluster state does not contain shard1_0", slice1_0);
    assertNotNull("Cluster state does not contain shard1_0", slice1_1);
    assertSame("shard1_0 is not active", Slice.State.ACTIVE, slice1_0.getState());
    assertSame("shard1_1 is not active", Slice.State.ACTIVE, slice1_1.getState());
    assertEquals("Wrong number of replicas created for shard1_0", numReplicas, slice1_0.getReplicas().size());
    assertEquals("Wrong number of replicas created for shard1_1", numReplicas, slice1_1.getReplicas().size());
    commit();
    // can't use checkShardConsistency because it insists on jettys and clients for each shard
    checkSubShardConsistency(SHARD1_0);
    checkSubShardConsistency(SHARD1_1);
    SolrQuery query = new SolrQuery("*:*").setRows(1000).setFields("id", "_version_");
    query.set("distrib", false);
    ZkCoreNodeProps shard1_0 = getLeaderUrlFromZk(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1_0);
    QueryResponse response;
    try (HttpSolrClient shard1_0Client = getHttpSolrClient(shard1_0.getCoreUrl())) {
        response = shard1_0Client.query(query);
    }
    long shard10Count = response.getResults().getNumFound();
    ZkCoreNodeProps shard1_1 = getLeaderUrlFromZk(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1_1);
    QueryResponse response2;
    try (HttpSolrClient shard1_1Client = getHttpSolrClient(shard1_1.getCoreUrl())) {
        response2 = shard1_1Client.query(query);
    }
    long shard11Count = response2.getResults().getNumFound();
    logDebugHelp(docCounts, response, shard10Count, response2, shard11Count);
    assertEquals("Wrong doc count on shard1_0. See SOLR-5309", docCounts[0], shard10Count);
    assertEquals("Wrong doc count on shard1_1. See SOLR-5309", docCounts[1], shard11Count);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) Slice(org.apache.solr.common.cloud.Slice) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) SolrQuery(org.apache.solr.client.solrj.SolrQuery)

Aggregations

ClusterState (org.apache.solr.common.cloud.ClusterState)122 Slice (org.apache.solr.common.cloud.Slice)78 Replica (org.apache.solr.common.cloud.Replica)65 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)56 DocCollection (org.apache.solr.common.cloud.DocCollection)49 HashMap (java.util.HashMap)42 ArrayList (java.util.ArrayList)36 Map (java.util.Map)25 IOException (java.io.IOException)20 Test (org.junit.Test)18 HashSet (java.util.HashSet)17 SolrException (org.apache.solr.common.SolrException)16 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)15 SolrQuery (org.apache.solr.client.solrj.SolrQuery)13 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)13 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)13 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)13 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)13 List (java.util.List)12 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)12