Search in sources :

Example 66 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class AbstractDistribZkTestBase method assertAllActive.

protected static void assertAllActive(String collection, ZkStateReader zkStateReader) throws KeeperException, InterruptedException {
    zkStateReader.forceUpdateCollection(collection);
    ClusterState clusterState = zkStateReader.getClusterState();
    Map<String, Slice> slices = clusterState.getSlicesMap(collection);
    if (slices == null) {
        throw new IllegalArgumentException("Cannot find collection:" + collection);
    }
    for (Map.Entry<String, Slice> entry : slices.entrySet()) {
        Slice slice = entry.getValue();
        if (slice.getState() != Slice.State.ACTIVE) {
            fail("Not all shards are ACTIVE - found a shard " + slice.getName() + " that is: " + slice.getState());
        }
        Map<String, Replica> shards = slice.getReplicasMap();
        for (Map.Entry<String, Replica> shard : shards.entrySet()) {
            Replica replica = shard.getValue();
            if (replica.getState() != Replica.State.ACTIVE) {
                fail("Not all replicas are ACTIVE - found a replica " + replica.getName() + " that is: " + replica.getState());
            }
        }
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Map(java.util.Map) Replica(org.apache.solr.common.cloud.Replica)

Example 67 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DistributedVersionInfoTest method testReplicaVersionHandling.

@Test
public void testReplicaVersionHandling() throws Exception {
    final String shardId = "shard1";
    CollectionAdminRequest.createCollection(COLLECTION, "conf", 1, 3).processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
    final ZkStateReader stateReader = cluster.getSolrClient().getZkStateReader();
    stateReader.waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> DocCollection.isFullyActive(n, c, 1, 3));
    final Replica leader = stateReader.getLeaderRetry(COLLECTION, shardId);
    // start by reloading the empty collection so we try to calculate the max from an empty index
    reloadCollection(leader, COLLECTION);
    sendDoc(1);
    cluster.getSolrClient().commit(COLLECTION);
    // verify doc is on the leader and replica
    final List<Replica> notLeaders = stateReader.getClusterState().getCollection(COLLECTION).getReplicas().stream().filter(r -> r.getCoreName().equals(leader.getCoreName()) == false).collect(Collectors.toList());
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1, null);
    // get max version from the leader and replica
    Replica replica = notLeaders.get(0);
    Long maxOnLeader = getMaxVersionFromIndex(leader);
    Long maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version: " + maxOnLeader, maxOnLeader, maxOnReplica);
    // send the same doc but with a lower version than the max in the index
    try (SolrClient client = getHttpSolrClient(replica.getCoreUrl())) {
        String docId = String.valueOf(1);
        SolrInputDocument doc = new SolrInputDocument();
        doc.setField("id", docId);
        // bad version!!!
        doc.setField("_version_", maxOnReplica - 1);
        // simulate what the leader does when sending a doc to a replica
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(DISTRIB_UPDATE_PARAM, DistributedUpdateProcessor.DistribPhase.FROMLEADER.toString());
        params.set(DISTRIB_FROM, leader.getCoreUrl());
        UpdateRequest req = new UpdateRequest();
        req.setParams(params);
        req.add(doc);
        log.info("Sending doc with out-of-date version (" + (maxOnReplica - 1) + ") document directly to replica");
        client.request(req);
        client.commit();
        Long docVersion = getVersionFromIndex(replica, docId);
        assertEquals("older version should have been thrown away", maxOnReplica, docVersion);
    }
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    // now start sending docs while collection is reloading
    delQ("*:*");
    commit();
    final Set<Integer> deletedDocs = new HashSet<>();
    final AtomicInteger docsSent = new AtomicInteger(0);
    final Random rand = new Random(5150);
    Thread docSenderThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(rand.nextInt(30) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 1000; i++) {
                if (i % (rand.nextInt(20) + 1) == 0) {
                    try {
                        Thread.sleep(rand.nextInt(50) + 1);
                    } catch (InterruptedException e) {
                    }
                }
                int docId = i + 1;
                try {
                    sendDoc(docId);
                    docsSent.incrementAndGet();
                } catch (Exception e) {
                }
            }
        }
    };
    Thread reloaderThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(300) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 3; i++) {
                try {
                    reloadCollection(leader, COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(300) + 300);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    Thread deleteThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 200; i++) {
                try {
                    Thread.sleep(rand.nextInt(50) + 1);
                } catch (InterruptedException e) {
                }
                int ds = docsSent.get();
                if (ds > 0) {
                    int docToDelete = rand.nextInt(ds) + 1;
                    if (!deletedDocs.contains(docToDelete)) {
                        delI(String.valueOf(docToDelete));
                        deletedDocs.add(docToDelete);
                    }
                }
            }
        }
    };
    Thread committerThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(200) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 20; i++) {
                try {
                    cluster.getSolrClient().commit(COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(100) + 100);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    docSenderThread.start();
    reloaderThread.start();
    committerThread.start();
    deleteThread.start();
    docSenderThread.join();
    reloaderThread.join();
    committerThread.join();
    deleteThread.join();
    cluster.getSolrClient().commit(COLLECTION);
    log.info("Total of " + deletedDocs.size() + " docs deleted");
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version before reload", maxOnLeader, maxOnReplica);
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1000, deletedDocs);
}
Also used : BeforeClass(org.junit.BeforeClass) Slow(org.apache.lucene.util.LuceneTestCase.Slow) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrDocumentList(org.apache.solr.common.SolrDocumentList) CoreAdminResponse(org.apache.solr.client.solrj.response.CoreAdminResponse) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SolrServerException(org.apache.solr.client.solrj.SolrServerException) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) DISTRIB_UPDATE_PARAM(org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM) SuppressSSL(org.apache.solr.SolrTestCaseJ4.SuppressSSL) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Logger(org.slf4j.Logger) JSONTestUtil(org.apache.solr.JSONTestUtil) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) MethodHandles(java.lang.invoke.MethodHandles) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) Set(java.util.Set) IOException(java.io.IOException) DistributedUpdateProcessor(org.apache.solr.update.processor.DistributedUpdateProcessor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) Replica(org.apache.solr.common.cloud.Replica) NamedList(org.apache.solr.common.util.NamedList) SolrClient(org.apache.solr.client.solrj.SolrClient) TimeUnit(java.util.concurrent.TimeUnit) SolrDocument(org.apache.solr.common.SolrDocument) List(java.util.List) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrQuery(org.apache.solr.client.solrj.SolrQuery) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) DISTRIB_FROM(org.apache.solr.update.processor.DistributedUpdateProcessor.DISTRIB_FROM) Collections(java.util.Collections) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) SolrInputDocument(org.apache.solr.common.SolrInputDocument) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Random(java.util.Random) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 68 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class HttpPartitionTest method testMinRf.

protected void testMinRf() throws Exception {
    // create a collection that has 1 shard and 3 replicas
    String testCollectionName = "collMinRf_1x3";
    createCollection(testCollectionName, 1, 3, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1, 2);
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 2);
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
    // Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
    SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    // indexing during a partition
    int achievedRf = sendDoc(2, 2);
    assertEquals("Unexpected achieved replication factor", 2, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replica is DOWN
    ZkStateReader zkr = cloudClient.getZkStateReader();
    // force the state to be fresh
    zkr.forceUpdateCollection(testCollectionName);
    // force the state to be fresh
    ;
    ClusterState cs = zkr.getClusterState();
    Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
    Slice slice = slices.iterator().next();
    Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
    assertEquals("The partitioned replica did not get marked down", Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
    proxy0.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
    // Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
    proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
    proxy1.close();
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 1, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
    ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
    proxy0.reopen();
    proxy1.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Check that doc 3 is on the leader but not on the notLeaders
    Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
    try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
        assertDocExists(leaderSolr, testCollectionName, "3");
    }
    for (Replica notLeader : notLeaders) {
        try (HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName)) {
            assertDocNotExists(notLeaderSolr, testCollectionName, "3");
        }
    }
    // Retry sending doc 3
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 3, achievedRf);
    // Now doc 3 should be on all replicas
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica)

Example 69 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class HttpPartitionTest method testRf2.

protected void testRf2() throws Exception {
    // create a collection that has 1 shard but 2 replicas
    String testCollectionName = "c8n_1x2";
    createCollectionRetry(testCollectionName, 1, 2, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1);
    Replica notLeader = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive).get(0);
    // ok, now introduce a network partition between the leader and the replica
    SocketProxy proxy = getProxyForReplica(notLeader);
    proxy.close();
    // indexing during a partition
    sendDoc(2);
    // Have the partition last at least 1 sec
    // While this gives the impression that recovery is timing related, this is
    // really only
    // to give time for the state to be written to ZK before the test completes.
    // In other words,
    // without a brief pause, the test finishes so quickly that it doesn't give
    // time for the recovery process to kick-in
    Thread.sleep(sleepMsBeforeHealPartition);
    proxy.reopen();
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
    sendDoc(3);
    // sent 3 docs in so far, verify they are on the leader and replica
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
    // Get the max version from the replica core to make sure it gets updated after recovery (see SOLR-7625)
    JettySolrRunner replicaJetty = getJettyOnPort(getReplicaPort(notLeader));
    CoreContainer coreContainer = replicaJetty.getCoreContainer();
    ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
    String coreName = replicaCoreNodeProps.getCoreName();
    Long maxVersionBefore = null;
    try (SolrCore core = coreContainer.getCore(coreName)) {
        assertNotNull("Core '" + coreName + "' not found for replica: " + notLeader.getName(), core);
        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
        maxVersionBefore = ulog.getCurrentMaxVersion();
    }
    assertNotNull("max version bucket seed not set for core " + coreName, maxVersionBefore);
    log.info("Looked up max version bucket seed " + maxVersionBefore + " for core " + coreName);
    // now up the stakes and do more docs
    int numDocs = TEST_NIGHTLY ? 1000 : 100;
    boolean hasPartition = false;
    for (int d = 0; d < numDocs; d++) {
        // create / restore partition every 100 docs
        if (d % 10 == 0) {
            if (hasPartition) {
                proxy.reopen();
                hasPartition = false;
            } else {
                if (d >= 10) {
                    proxy.close();
                    hasPartition = true;
                    Thread.sleep(sleepMsBeforeHealPartition);
                }
            }
        }
        // 4 is offset as we've already indexed 1-3
        sendDoc(d + 4);
    }
    // restore connectivity if lost
    if (hasPartition) {
        proxy.reopen();
    }
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
    try (SolrCore core = coreContainer.getCore(coreName)) {
        assertNotNull("Core '" + coreName + "' not found for replica: " + notLeader.getName(), core);
        Long currentMaxVersion = core.getUpdateHandler().getUpdateLog().getCurrentMaxVersion();
        log.info("After recovery, looked up NEW max version bucket seed " + currentMaxVersion + " for core " + coreName + ", was: " + maxVersionBefore);
        assertTrue("max version bucket seed not updated after recovery!", currentMaxVersion > maxVersionBefore);
    }
    // verify all docs received
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, numDocs + 3);
    log.info("testRf2 succeeded ... deleting the " + testCollectionName + " collection");
    // try to clean up
    attemptCollectionDelete(cloudClient, testCollectionName);
}
Also used : CoreContainer(org.apache.solr.core.CoreContainer) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) SolrCore(org.apache.solr.core.SolrCore) UpdateLog(org.apache.solr.update.UpdateLog) Replica(org.apache.solr.common.cloud.Replica)

Example 70 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DeleteInactiveReplicaTest method deleteInactiveReplicaTest.

@Test
public void deleteInactiveReplicaTest() throws Exception {
    String collectionName = "delDeadColl";
    int replicationFactor = 2;
    int numShards = 2;
    int maxShardsPerNode = ((((numShards + 1) * replicationFactor) / cluster.getJettySolrRunners().size())) + 1;
    CollectionAdminRequest.createCollection(collectionName, "conf", numShards, replicationFactor).setMaxShardsPerNode(maxShardsPerNode).process(cluster.getSolrClient());
    waitForState("Expected a cluster of 2 shards and 2 replicas", collectionName, (n, c) -> {
        return DocCollection.isFullyActive(n, c, numShards, replicationFactor);
    });
    DocCollection collectionState = getCollectionState(collectionName);
    Slice shard = getRandomShard(collectionState);
    Replica replica = getRandomReplica(shard);
    JettySolrRunner jetty = cluster.getReplicaJetty(replica);
    cluster.stopJettySolrRunner(jetty);
    waitForState("Expected replica " + replica.getName() + " on down node to be removed from cluster state", collectionName, (n, c) -> {
        Replica r = c.getReplica(replica.getCoreName());
        return r == null || r.getState() != Replica.State.ACTIVE;
    });
    log.info("Removing replica {}/{} ", shard.getName(), replica.getName());
    CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).process(cluster.getSolrClient());
    waitForState("Expected deleted replica " + replica.getName() + " to be removed from cluster state", collectionName, (n, c) -> {
        return c.getReplica(replica.getCoreName()) == null;
    });
    cluster.startJettySolrRunner(jetty);
    log.info("restarted jetty");
    CoreContainer cc = jetty.getCoreContainer();
    CoreContainer.CoreLoadFailure loadFailure = cc.getCoreInitFailures().get(replica.getCoreName());
    assertNotNull("Deleted core was still loaded!", loadFailure);
    assertTrue("Unexpected load failure message: " + loadFailure.exception.getMessage(), loadFailure.exception.getMessage().contains("does not exist in shard"));
    // Check that we can't create a core with no coreNodeName
    try (SolrClient queryClient = getHttpSolrClient(jetty.getBaseUrl().toString())) {
        Exception e = expectThrows(Exception.class, () -> {
            CoreAdminRequest.Create createRequest = new CoreAdminRequest.Create();
            createRequest.setCoreName("testcore");
            createRequest.setCollection(collectionName);
            createRequest.setShardId("shard2");
            queryClient.request(createRequest);
        });
        assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("coreNodeName missing"));
    }
}
Also used : JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) Replica(org.apache.solr.common.cloud.Replica) CoreContainer(org.apache.solr.core.CoreContainer) SolrClient(org.apache.solr.client.solrj.SolrClient) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Test(org.junit.Test)

Aggregations

Replica (org.apache.solr.common.cloud.Replica)232 Slice (org.apache.solr.common.cloud.Slice)140 DocCollection (org.apache.solr.common.cloud.DocCollection)86 ArrayList (java.util.ArrayList)81 ClusterState (org.apache.solr.common.cloud.ClusterState)67 HashMap (java.util.HashMap)60 SolrException (org.apache.solr.common.SolrException)53 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)50 Test (org.junit.Test)50 Map (java.util.Map)45 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)35 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)29 NamedList (org.apache.solr.common.util.NamedList)28 SolrQuery (org.apache.solr.client.solrj.SolrQuery)26 IOException (java.io.IOException)25 SolrInputDocument (org.apache.solr.common.SolrInputDocument)25 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)25 HashSet (java.util.HashSet)24 List (java.util.List)20