Search in sources :

Example 41 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class AbstractFullDistribZkTestBase method showCounts.

public void showCounts() {
    Set<String> theShards = shardToJetty.keySet();
    for (String shard : theShards) {
        List<CloudJettyRunner> solrJetties = shardToJetty.get(shard);
        for (CloudJettyRunner cjetty : solrJetties) {
            ZkNodeProps props = cjetty.info;
            System.err.println("PROPS:" + props);
            try {
                SolrParams query = params("q", "*:*", "rows", "0", "distrib", "false", "tests", // "tests" is just a
                "checkShardConsistency");
                // tag that won't do
                // anything except be
                // echoed in logs
                long num = cjetty.client.solrClient.query(query).getResults().getNumFound();
                System.err.println("DOCS:" + num);
            } catch (SolrServerException | SolrException | IOException e) {
                System.err.println("error contacting client: " + e.getMessage() + "\n");
                continue;
            }
            boolean live = false;
            String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP);
            ZkStateReader zkStateReader = cloudClient.getZkStateReader();
            if (zkStateReader.getClusterState().liveNodesContain(nodeName)) {
                live = true;
            }
            System.err.println(" live:" + live);
        }
    }
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrParams(org.apache.solr.common.params.SolrParams) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrException(org.apache.solr.common.SolrException)

Example 42 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class AbstractFullDistribZkTestBase method waitForRecoveriesToFinish.

// serial commit...
protected void waitForRecoveriesToFinish(boolean verbose) throws Exception {
    ZkStateReader zkStateReader = cloudClient.getZkStateReader();
    super.waitForRecoveriesToFinish(DEFAULT_COLLECTION, zkStateReader, verbose);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader)

Example 43 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class AbstractFullDistribZkTestBase method getTotalReplicas.

/* Total number of replicas (number of cores serving an index to the collection) shown by the cluster state */
protected int getTotalReplicas(String collection) {
    ZkStateReader zkStateReader = cloudClient.getZkStateReader();
    DocCollection coll = zkStateReader.getClusterState().getCollectionOrNull(collection);
    // support for when collection hasn't been created yet
    if (coll == null)
        return 0;
    int cnt = 0;
    for (Slice slices : coll.getSlices()) {
        cnt += slices.getReplicas().size();
    }
    return cnt;
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection)

Example 44 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class DistributedVersionInfoTest method testReplicaVersionHandling.

@Test
public void testReplicaVersionHandling() throws Exception {
    final String shardId = "shard1";
    CollectionAdminRequest.createCollection(COLLECTION, "conf", 1, 3).processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
    final ZkStateReader stateReader = cluster.getSolrClient().getZkStateReader();
    stateReader.waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> DocCollection.isFullyActive(n, c, 1, 3));
    final Replica leader = stateReader.getLeaderRetry(COLLECTION, shardId);
    // start by reloading the empty collection so we try to calculate the max from an empty index
    reloadCollection(leader, COLLECTION);
    sendDoc(1);
    cluster.getSolrClient().commit(COLLECTION);
    // verify doc is on the leader and replica
    final List<Replica> notLeaders = stateReader.getClusterState().getCollection(COLLECTION).getReplicas().stream().filter(r -> r.getCoreName().equals(leader.getCoreName()) == false).collect(Collectors.toList());
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1, null);
    // get max version from the leader and replica
    Replica replica = notLeaders.get(0);
    Long maxOnLeader = getMaxVersionFromIndex(leader);
    Long maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version: " + maxOnLeader, maxOnLeader, maxOnReplica);
    // send the same doc but with a lower version than the max in the index
    try (SolrClient client = getHttpSolrClient(replica.getCoreUrl())) {
        String docId = String.valueOf(1);
        SolrInputDocument doc = new SolrInputDocument();
        doc.setField("id", docId);
        // bad version!!!
        doc.setField("_version_", maxOnReplica - 1);
        // simulate what the leader does when sending a doc to a replica
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(DISTRIB_UPDATE_PARAM, DistributedUpdateProcessor.DistribPhase.FROMLEADER.toString());
        params.set(DISTRIB_FROM, leader.getCoreUrl());
        UpdateRequest req = new UpdateRequest();
        req.setParams(params);
        req.add(doc);
        log.info("Sending doc with out-of-date version (" + (maxOnReplica - 1) + ") document directly to replica");
        client.request(req);
        client.commit();
        Long docVersion = getVersionFromIndex(replica, docId);
        assertEquals("older version should have been thrown away", maxOnReplica, docVersion);
    }
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    // now start sending docs while collection is reloading
    delQ("*:*");
    commit();
    final Set<Integer> deletedDocs = new HashSet<>();
    final AtomicInteger docsSent = new AtomicInteger(0);
    final Random rand = new Random(5150);
    Thread docSenderThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(rand.nextInt(30) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 1000; i++) {
                if (i % (rand.nextInt(20) + 1) == 0) {
                    try {
                        Thread.sleep(rand.nextInt(50) + 1);
                    } catch (InterruptedException e) {
                    }
                }
                int docId = i + 1;
                try {
                    sendDoc(docId);
                    docsSent.incrementAndGet();
                } catch (Exception e) {
                }
            }
        }
    };
    Thread reloaderThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(300) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 3; i++) {
                try {
                    reloadCollection(leader, COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(300) + 300);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    Thread deleteThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 200; i++) {
                try {
                    Thread.sleep(rand.nextInt(50) + 1);
                } catch (InterruptedException e) {
                }
                int ds = docsSent.get();
                if (ds > 0) {
                    int docToDelete = rand.nextInt(ds) + 1;
                    if (!deletedDocs.contains(docToDelete)) {
                        delI(String.valueOf(docToDelete));
                        deletedDocs.add(docToDelete);
                    }
                }
            }
        }
    };
    Thread committerThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(200) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 20; i++) {
                try {
                    cluster.getSolrClient().commit(COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(100) + 100);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    docSenderThread.start();
    reloaderThread.start();
    committerThread.start();
    deleteThread.start();
    docSenderThread.join();
    reloaderThread.join();
    committerThread.join();
    deleteThread.join();
    cluster.getSolrClient().commit(COLLECTION);
    log.info("Total of " + deletedDocs.size() + " docs deleted");
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version before reload", maxOnLeader, maxOnReplica);
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1000, deletedDocs);
}
Also used : BeforeClass(org.junit.BeforeClass) Slow(org.apache.lucene.util.LuceneTestCase.Slow) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrDocumentList(org.apache.solr.common.SolrDocumentList) CoreAdminResponse(org.apache.solr.client.solrj.response.CoreAdminResponse) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SolrServerException(org.apache.solr.client.solrj.SolrServerException) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) DISTRIB_UPDATE_PARAM(org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM) SuppressSSL(org.apache.solr.SolrTestCaseJ4.SuppressSSL) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Logger(org.slf4j.Logger) JSONTestUtil(org.apache.solr.JSONTestUtil) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) MethodHandles(java.lang.invoke.MethodHandles) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) Set(java.util.Set) IOException(java.io.IOException) DistributedUpdateProcessor(org.apache.solr.update.processor.DistributedUpdateProcessor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) Replica(org.apache.solr.common.cloud.Replica) NamedList(org.apache.solr.common.util.NamedList) SolrClient(org.apache.solr.client.solrj.SolrClient) TimeUnit(java.util.concurrent.TimeUnit) SolrDocument(org.apache.solr.common.SolrDocument) List(java.util.List) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrQuery(org.apache.solr.client.solrj.SolrQuery) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) DISTRIB_FROM(org.apache.solr.update.processor.DistributedUpdateProcessor.DISTRIB_FROM) Collections(java.util.Collections) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) SolrInputDocument(org.apache.solr.common.SolrInputDocument) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Random(java.util.Random) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 45 with ZkStateReader

use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.

the class HttpPartitionTest method testMinRf.

protected void testMinRf() throws Exception {
    // create a collection that has 1 shard and 3 replicas
    String testCollectionName = "collMinRf_1x3";
    createCollection(testCollectionName, 1, 3, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1, 2);
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 2);
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
    // Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
    SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    // indexing during a partition
    int achievedRf = sendDoc(2, 2);
    assertEquals("Unexpected achieved replication factor", 2, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replica is DOWN
    ZkStateReader zkr = cloudClient.getZkStateReader();
    // force the state to be fresh
    zkr.forceUpdateCollection(testCollectionName);
    // force the state to be fresh
    ;
    ClusterState cs = zkr.getClusterState();
    Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
    Slice slice = slices.iterator().next();
    Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
    assertEquals("The partitioned replica did not get marked down", Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
    proxy0.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
    // Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
    proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
    proxy1.close();
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 1, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
    ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
    proxy0.reopen();
    proxy1.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Check that doc 3 is on the leader but not on the notLeaders
    Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
    try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
        assertDocExists(leaderSolr, testCollectionName, "3");
    }
    for (Replica notLeader : notLeaders) {
        try (HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName)) {
            assertDocNotExists(notLeaderSolr, testCollectionName, "3");
        }
    }
    // Retry sending doc 3
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 3, achievedRf);
    // Now doc 3 should be on all replicas
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica)

Aggregations

ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)133 ClusterState (org.apache.solr.common.cloud.ClusterState)58 Replica (org.apache.solr.common.cloud.Replica)48 Slice (org.apache.solr.common.cloud.Slice)48 HashMap (java.util.HashMap)34 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)33 ArrayList (java.util.ArrayList)32 DocCollection (org.apache.solr.common.cloud.DocCollection)31 Test (org.junit.Test)26 SolrException (org.apache.solr.common.SolrException)25 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)22 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)20 IOException (java.io.IOException)19 Map (java.util.Map)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)18 KeeperException (org.apache.zookeeper.KeeperException)16 SolrQuery (org.apache.solr.client.solrj.SolrQuery)15 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)15 SolrServerException (org.apache.solr.client.solrj.SolrServerException)13 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)12