Search in sources :

Example 86 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class ShardSplitTest method testSplitStaticIndexReplication.

/*
  Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
  Add a replica. Ensure count matches in leader and replica.
   */
public void testSplitStaticIndexReplication() throws Exception {
    waitForThingsToLevelOut(15);
    DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
    Replica replica = defCol.getReplicas().get(0);
    String nodeName = replica.getNodeName();
    String collectionName = "testSplitStaticIndexReplication";
    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
    // some high number so we can create replicas without hindrance
    create.setMaxShardsPerNode(5);
    // we want to create the leader on a fixed node so that we know which one to restart later
    create.setCreateNodeSet(nodeName);
    create.process(cloudClient);
    try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
        client.setDefaultCollection(collectionName);
        StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
        try {
            thread.start();
            // give the indexer sometime to do its work
            Thread.sleep(1000);
            thread.safeStop();
            thread.join();
            client.commit();
            controlClient.commit();
            CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
            splitShard.setShardName(SHARD1);
            String asyncId = splitShard.processAsync(client);
            RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
            if (state == RequestStatusState.COMPLETED) {
                waitForRecoveriesToFinish(collectionName, true);
                // let's wait to see parent shard become inactive
                CountDownLatch latch = new CountDownLatch(1);
                client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {

                    @Override
                    public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
                        Slice parent = collectionState.getSlice(SHARD1);
                        Slice slice10 = collectionState.getSlice(SHARD1_0);
                        Slice slice11 = collectionState.getSlice(SHARD1_1);
                        if (slice10 != null && slice11 != null && parent.getState() == Slice.State.INACTIVE && slice10.getState() == Slice.State.ACTIVE && slice11.getState() == Slice.State.ACTIVE) {
                            latch.countDown();
                            // removes the watch
                            return true;
                        }
                        return false;
                    }
                });
                latch.await(1, TimeUnit.MINUTES);
                if (latch.getCount() != 0) {
                    // sanity check
                    fail("Sub-shards did not become active even after waiting for 1 minute");
                }
                int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
                // restart the sub-shard leader node
                boolean restarted = false;
                for (JettySolrRunner jetty : jettys) {
                    int port = jetty.getBaseUrl().getPort();
                    if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
                        ChaosMonkey.kill(jetty);
                        ChaosMonkey.start(jetty);
                        restarted = true;
                        break;
                    }
                }
                if (!restarted) {
                    // sanity check
                    fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
                }
                // add a new replica for the sub-shard
                CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
                // use control client because less chances of it being the node being restarted
                // this is to avoid flakiness of test because of NoHttpResponseExceptions
                String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
                try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
                    state = addReplica.processAndWait(control, 30);
                }
                if (state == RequestStatusState.COMPLETED) {
                    CountDownLatch newReplicaLatch = new CountDownLatch(1);
                    client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {

                        @Override
                        public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
                            if (liveNodes.size() != liveNodeCount) {
                                return false;
                            }
                            Slice slice = collectionState.getSlice(SHARD1_0);
                            if (slice.getReplicas().size() == 2) {
                                if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
                                    // we see replicas and none of them are recovering
                                    newReplicaLatch.countDown();
                                    return true;
                                }
                            }
                            return false;
                        }
                    });
                    newReplicaLatch.await(30, TimeUnit.SECONDS);
                    // check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
                    // handle new shards/replica so well.
                    ClusterState clusterState = client.getZkStateReader().getClusterState();
                    DocCollection collection = clusterState.getCollection(collectionName);
                    int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
                    assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
                } else {
                    fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
                }
            } else {
                fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
            }
        } finally {
            thread.safeStop();
            thread.join();
        }
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) CollectionStateWatcher(org.apache.solr.common.cloud.CollectionStateWatcher) CountDownLatch(java.util.concurrent.CountDownLatch) Replica(org.apache.solr.common.cloud.Replica) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) RequestStatusState(org.apache.solr.client.solrj.response.RequestStatusState) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection)

Example 87 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class ShardSplitTest method checkSubShardConsistency.

protected void checkSubShardConsistency(String shard) throws SolrServerException, IOException {
    SolrQuery query = new SolrQuery("*:*").setRows(1000).setFields("id", "_version_");
    query.set("distrib", false);
    ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
    Slice slice = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, shard);
    long[] numFound = new long[slice.getReplicasMap().size()];
    int c = 0;
    for (Replica replica : slice.getReplicas()) {
        String coreUrl = new ZkCoreNodeProps(replica).getCoreUrl();
        QueryResponse response;
        try (HttpSolrClient client = getHttpSolrClient(coreUrl)) {
            response = client.query(query);
        }
        numFound[c++] = response.getResults().getNumFound();
        log.info("Shard: " + shard + " Replica: {} has {} docs", coreUrl, String.valueOf(response.getResults().getNumFound()));
        assertTrue("Shard: " + shard + " Replica: " + coreUrl + " has 0 docs", response.getResults().getNumFound() > 0);
    }
    for (int i = 0; i < slice.getReplicasMap().size(); i++) {
        assertEquals(shard + " is not consistent", numFound[0], numFound[i]);
    }
}
Also used : HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) Slice(org.apache.solr.common.cloud.Slice) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) Replica(org.apache.solr.common.cloud.Replica) SolrQuery(org.apache.solr.client.solrj.SolrQuery)

Example 88 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DistribDocExpirationUpdateProcessorTest method getIndexVersionOfAllReplicas.

/**
   * returns a map whose key is the coreNodeName and whose value is what the replication
   * handler returns for the indexversion
   */
private Map<String, Long> getIndexVersionOfAllReplicas() throws IOException, SolrServerException {
    Map<String, Long> results = new HashMap<String, Long>();
    DocCollection collectionState = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION);
    for (Replica replica : collectionState.getReplicas()) {
        String coreName = replica.getCoreName();
        try (HttpSolrClient client = getHttpSolrClient(replica.getCoreUrl())) {
            ModifiableSolrParams params = new ModifiableSolrParams();
            params.set("command", "indexversion");
            params.set("_trace", "getIndexVersion");
            params.set("qt", ReplicationHandler.PATH);
            QueryRequest req = new QueryRequest(params);
            NamedList<Object> res = client.request(req);
            assertNotNull("null response from server: " + coreName, res);
            Object version = res.get("indexversion");
            assertNotNull("null version from server: " + coreName, version);
            assertTrue("version isn't a long: " + coreName, version instanceof Long);
            results.put(coreName, (Long) version);
            long numDocs = client.query(params("q", "*:*", "distrib", "false", "rows", "0", "_trace", "counting_docs")).getResults().getNumFound();
            log.info("core=" + coreName + "; ver=" + version + "; numDocs=" + numDocs);
        }
    }
    return results;
}
Also used : HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) HashMap(java.util.HashMap) DocCollection(org.apache.solr.common.cloud.DocCollection) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Example 89 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DistribDocExpirationUpdateProcessorTest method test.

@Test
public void test() throws Exception {
    // some docs with no expiration
    UpdateRequest req1 = new UpdateRequest();
    for (int i = 1; i <= 100; i++) {
        req1.add(sdoc("id", i));
    }
    req1.commit(cluster.getSolrClient(), COLLECTION);
    // this doc better not already exist
    waitForNoResults(0, params("q", "id:999", "rows", "0", "_trace", "sanity_check"));
    // record the indexversion for each server so we can check later
    // that it only changes for one shard
    final Map<String, Long> initIndexVersions = getIndexVersionOfAllReplicas();
    assertTrue("WTF? no versions?", 0 < initIndexVersions.size());
    // add a doc with a short TTL 
    new UpdateRequest().add(sdoc("id", "999", "tTl_s", "+30SECONDS")).commit(cluster.getSolrClient(), COLLECTION);
    // wait for one doc to be deleted
    waitForNoResults(180, params("q", "id:999", "rows", "0", "_trace", "did_it_expire_yet"));
    // verify only one shard changed
    final Map<String, Long> finalIndexVersions = getIndexVersionOfAllReplicas();
    assertEquals("WTF? not same num versions?", initIndexVersions.size(), finalIndexVersions.size());
    final Set<String> nodesThatChange = new HashSet<String>();
    final Set<String> shardsThatChange = new HashSet<String>();
    int coresCompared = 0;
    DocCollection collectionState = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION);
    for (Replica replica : collectionState.getReplicas()) {
        coresCompared++;
        String name = replica.getName();
        String core = replica.getCoreName();
        Long initVersion = initIndexVersions.get(core);
        Long finalVersion = finalIndexVersions.get(core);
        assertNotNull(name + ": no init version for core: " + core, initVersion);
        assertNotNull(name + ": no final version for core: " + core, finalVersion);
        if (!initVersion.equals(finalVersion)) {
            nodesThatChange.add(core + "(" + name + ")");
            shardsThatChange.add(name);
        }
    }
    assertEquals("Exactly one shard should have changed, instead: " + shardsThatChange + " nodes=(" + nodesThatChange + ")", 1, shardsThatChange.size());
    assertEquals("somehow we missed some cores?", initIndexVersions.size(), coresCompared);
// TODO: above logic verifies that deleteByQuery happens on all nodes, and ...
// doesn't affect searcher re-open on shards w/o expired docs ... can we also verify 
// that *only* one node is sending the deletes ?
// (ie: no flood of redundant deletes?)
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) DocCollection(org.apache.solr.common.cloud.DocCollection) Replica(org.apache.solr.common.cloud.Replica) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 90 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DistribJoinFromCollectionTest method setupCluster.

@BeforeClass
public static void setupCluster() throws Exception {
    final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf");
    String configName = "solrCloudCollectionConfig";
    int nodeCount = 5;
    configureCluster(nodeCount).addConfig(configName, configDir).configure();
    Map<String, String> collectionProperties = new HashMap<>();
    collectionProperties.put("config", "solrconfig-tlog.xml");
    collectionProperties.put("schema", "schema.xml");
    // create a collection holding data for the "to" side of the JOIN
    int shards = 2;
    int replicas = 2;
    CollectionAdminRequest.createCollection(toColl, configName, shards, replicas).setProperties(collectionProperties).process(cluster.getSolrClient());
    // get the set of nodes where replicas for the "to" collection exist
    Set<String> nodeSet = new HashSet<>();
    ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
    ClusterState cs = zkStateReader.getClusterState();
    for (Slice slice : cs.getCollection(toColl).getActiveSlices()) for (Replica replica : slice.getReplicas()) nodeSet.add(replica.getNodeName());
    assertTrue(nodeSet.size() > 0);
    // deploy the "from" collection to all nodes where the "to" collection exists
    CollectionAdminRequest.createCollection(fromColl, configName, 1, 4).setCreateNodeSet(StringUtils.join(nodeSet, ",")).setProperties(collectionProperties).process(cluster.getSolrClient());
    toDocId = indexDoc(toColl, 1001, "a", null, "b");
    indexDoc(fromColl, 2001, "a", "c", null);
    // so the commits fire
    Thread.sleep(1000);
}
Also used : Path(java.nio.file.Path) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica) HashSet(java.util.HashSet) BeforeClass(org.junit.BeforeClass)

Aggregations

Replica (org.apache.solr.common.cloud.Replica)232 Slice (org.apache.solr.common.cloud.Slice)140 DocCollection (org.apache.solr.common.cloud.DocCollection)86 ArrayList (java.util.ArrayList)81 ClusterState (org.apache.solr.common.cloud.ClusterState)67 HashMap (java.util.HashMap)60 SolrException (org.apache.solr.common.SolrException)53 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)50 Test (org.junit.Test)50 Map (java.util.Map)45 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)35 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)29 NamedList (org.apache.solr.common.util.NamedList)28 SolrQuery (org.apache.solr.client.solrj.SolrQuery)26 IOException (java.io.IOException)25 SolrInputDocument (org.apache.solr.common.SolrInputDocument)25 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)25 HashSet (java.util.HashSet)24 List (java.util.List)20