Search in sources :

Example 1 with Replica

use of org.apache.solr.common.cloud.Replica in project titan by thinkaurelius.

the class SolrIndex method waitForRecoveriesToFinish.

/**
     * Wait for all the collection shards to be ready.
     */
private static void waitForRecoveriesToFinish(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
    ZkStateReader zkStateReader = server.getZkStateReader();
    try {
        boolean cont = true;
        while (cont) {
            boolean sawLiveRecovering = false;
            zkStateReader.updateClusterState(true);
            ClusterState clusterState = zkStateReader.getClusterState();
            Map<String, Slice> slices = clusterState.getSlicesMap(collection);
            Preconditions.checkNotNull("Could not find collection:" + collection, slices);
            // remove SYNC state per: http://tinyurl.com/pag6rwt
            for (Map.Entry<String, Slice> entry : slices.entrySet()) {
                Map<String, Replica> shards = entry.getValue().getReplicasMap();
                for (Map.Entry<String, Replica> shard : shards.entrySet()) {
                    String state = shard.getValue().getStr(ZkStateReader.STATE_PROP);
                    if ((state.equals(Replica.State.RECOVERING) || state.equals(Replica.State.DOWN)) && clusterState.liveNodesContain(shard.getValue().getStr(ZkStateReader.NODE_NAME_PROP))) {
                        sawLiveRecovering = true;
                    }
                }
            }
            if (!sawLiveRecovering) {
                cont = false;
            } else {
                Thread.sleep(1000);
            }
        }
    } finally {
        logger.info("Exiting solr wait");
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica)

Example 2 with Replica

use of org.apache.solr.common.cloud.Replica in project incubator-atlas by apache.

the class Solr5Index method waitForRecoveriesToFinish.

/**
     * Wait for all the collection shards to be ready.
     */
private static void waitForRecoveriesToFinish(CloudSolrClient server, String collection) throws KeeperException, InterruptedException {
    ZkStateReader zkStateReader = server.getZkStateReader();
    try {
        boolean cont = true;
        while (cont) {
            boolean sawLiveRecovering = false;
            zkStateReader.updateClusterState();
            ClusterState clusterState = zkStateReader.getClusterState();
            Map<String, Slice> slices = clusterState.getSlicesMap(collection);
            Preconditions.checkNotNull("Could not find collection:" + collection, slices);
            for (Map.Entry<String, Slice> entry : slices.entrySet()) {
                Map<String, Replica> shards = entry.getValue().getReplicasMap();
                for (Map.Entry<String, Replica> shard : shards.entrySet()) {
                    String state = shard.getValue().getStr(ZkStateReader.STATE_PROP);
                    if ((state.equals(Replica.State.RECOVERING.toString()) || state.equals(Replica.State.DOWN.toString())) && clusterState.liveNodesContain(shard.getValue().getStr(ZkStateReader.NODE_NAME_PROP))) {
                        sawLiveRecovering = true;
                    }
                }
            }
            if (!sawLiveRecovering) {
                cont = false;
            } else {
                Thread.sleep(1000);
            }
        }
    } finally {
        logger.info("Exiting solr wait");
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Map(java.util.Map) HashMap(java.util.HashMap) Replica(org.apache.solr.common.cloud.Replica)

Example 3 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class DistributedVersionInfoTest method testReplicaVersionHandling.

@Test
public void testReplicaVersionHandling() throws Exception {
    final String shardId = "shard1";
    CollectionAdminRequest.createCollection(COLLECTION, "conf", 1, 3).processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT);
    final ZkStateReader stateReader = cluster.getSolrClient().getZkStateReader();
    stateReader.waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> DocCollection.isFullyActive(n, c, 1, 3));
    final Replica leader = stateReader.getLeaderRetry(COLLECTION, shardId);
    // start by reloading the empty collection so we try to calculate the max from an empty index
    reloadCollection(leader, COLLECTION);
    sendDoc(1);
    cluster.getSolrClient().commit(COLLECTION);
    // verify doc is on the leader and replica
    final List<Replica> notLeaders = stateReader.getClusterState().getCollection(COLLECTION).getReplicas().stream().filter(r -> r.getCoreName().equals(leader.getCoreName()) == false).collect(Collectors.toList());
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1, null);
    // get max version from the leader and replica
    Replica replica = notLeaders.get(0);
    Long maxOnLeader = getMaxVersionFromIndex(leader);
    Long maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version: " + maxOnLeader, maxOnLeader, maxOnReplica);
    // send the same doc but with a lower version than the max in the index
    try (SolrClient client = getHttpSolrClient(replica.getCoreUrl())) {
        String docId = String.valueOf(1);
        SolrInputDocument doc = new SolrInputDocument();
        doc.setField("id", docId);
        // bad version!!!
        doc.setField("_version_", maxOnReplica - 1);
        // simulate what the leader does when sending a doc to a replica
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(DISTRIB_UPDATE_PARAM, DistributedUpdateProcessor.DistribPhase.FROMLEADER.toString());
        params.set(DISTRIB_FROM, leader.getCoreUrl());
        UpdateRequest req = new UpdateRequest();
        req.setParams(params);
        req.add(doc);
        log.info("Sending doc with out-of-date version (" + (maxOnReplica - 1) + ") document directly to replica");
        client.request(req);
        client.commit();
        Long docVersion = getVersionFromIndex(replica, docId);
        assertEquals("older version should have been thrown away", maxOnReplica, docVersion);
    }
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    // now start sending docs while collection is reloading
    delQ("*:*");
    commit();
    final Set<Integer> deletedDocs = new HashSet<>();
    final AtomicInteger docsSent = new AtomicInteger(0);
    final Random rand = new Random(5150);
    Thread docSenderThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(rand.nextInt(30) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 1000; i++) {
                if (i % (rand.nextInt(20) + 1) == 0) {
                    try {
                        Thread.sleep(rand.nextInt(50) + 1);
                    } catch (InterruptedException e) {
                    }
                }
                int docId = i + 1;
                try {
                    sendDoc(docId);
                    docsSent.incrementAndGet();
                } catch (Exception e) {
                }
            }
        }
    };
    Thread reloaderThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(300) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 3; i++) {
                try {
                    reloadCollection(leader, COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(300) + 300);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    Thread deleteThread = new Thread() {

        public void run() {
            // brief delay before sending docs
            try {
                Thread.sleep(500);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 200; i++) {
                try {
                    Thread.sleep(rand.nextInt(50) + 1);
                } catch (InterruptedException e) {
                }
                int ds = docsSent.get();
                if (ds > 0) {
                    int docToDelete = rand.nextInt(ds) + 1;
                    if (!deletedDocs.contains(docToDelete)) {
                        delI(String.valueOf(docToDelete));
                        deletedDocs.add(docToDelete);
                    }
                }
            }
        }
    };
    Thread committerThread = new Thread() {

        public void run() {
            try {
                Thread.sleep(rand.nextInt(200) + 1);
            } catch (InterruptedException e) {
            }
            for (int i = 0; i < 20; i++) {
                try {
                    cluster.getSolrClient().commit(COLLECTION);
                } catch (Exception e) {
                }
                try {
                    Thread.sleep(rand.nextInt(100) + 100);
                } catch (InterruptedException e) {
                }
            }
        }
    };
    docSenderThread.start();
    reloaderThread.start();
    committerThread.start();
    deleteThread.start();
    docSenderThread.join();
    reloaderThread.join();
    committerThread.join();
    deleteThread.join();
    cluster.getSolrClient().commit(COLLECTION);
    log.info("Total of " + deletedDocs.size() + " docs deleted");
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version before reload", maxOnLeader, maxOnReplica);
    reloadCollection(leader, COLLECTION);
    maxOnLeader = getMaxVersionFromIndex(leader);
    maxOnReplica = getMaxVersionFromIndex(replica);
    assertEquals("leader and replica should have same max version after reload", maxOnLeader, maxOnReplica);
    assertDocsExistInAllReplicas(leader, notLeaders, COLLECTION, 1, 1000, deletedDocs);
}
Also used : BeforeClass(org.junit.BeforeClass) Slow(org.apache.lucene.util.LuceneTestCase.Slow) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrDocumentList(org.apache.solr.common.SolrDocumentList) CoreAdminResponse(org.apache.solr.client.solrj.response.CoreAdminResponse) LoggerFactory(org.slf4j.LoggerFactory) Random(java.util.Random) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) SolrServerException(org.apache.solr.client.solrj.SolrServerException) QueryRequest(org.apache.solr.client.solrj.request.QueryRequest) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) DISTRIB_UPDATE_PARAM(org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM) SuppressSSL(org.apache.solr.SolrTestCaseJ4.SuppressSSL) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Logger(org.slf4j.Logger) JSONTestUtil(org.apache.solr.JSONTestUtil) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) MethodHandles(java.lang.invoke.MethodHandles) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) Set(java.util.Set) IOException(java.io.IOException) DistributedUpdateProcessor(org.apache.solr.update.processor.DistributedUpdateProcessor) Test(org.junit.Test) Collectors(java.util.stream.Collectors) Replica(org.apache.solr.common.cloud.Replica) NamedList(org.apache.solr.common.util.NamedList) SolrClient(org.apache.solr.client.solrj.SolrClient) TimeUnit(java.util.concurrent.TimeUnit) SolrDocument(org.apache.solr.common.SolrDocument) List(java.util.List) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrQuery(org.apache.solr.client.solrj.SolrQuery) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) DISTRIB_FROM(org.apache.solr.update.processor.DistributedUpdateProcessor.DISTRIB_FROM) Collections(java.util.Collections) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) CollectionAdminRequest(org.apache.solr.client.solrj.request.CollectionAdminRequest) SolrInputDocument(org.apache.solr.common.SolrInputDocument) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Random(java.util.Random) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class HttpPartitionTest method testMinRf.

protected void testMinRf() throws Exception {
    // create a collection that has 1 shard and 3 replicas
    String testCollectionName = "collMinRf_1x3";
    createCollection(testCollectionName, 1, 3, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1, 2);
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 2);
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
    // Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
    SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    // indexing during a partition
    int achievedRf = sendDoc(2, 2);
    assertEquals("Unexpected achieved replication factor", 2, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replica is DOWN
    ZkStateReader zkr = cloudClient.getZkStateReader();
    // force the state to be fresh
    zkr.forceUpdateCollection(testCollectionName);
    // force the state to be fresh
    ;
    ClusterState cs = zkr.getClusterState();
    Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
    Slice slice = slices.iterator().next();
    Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
    assertEquals("The partitioned replica did not get marked down", Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
    proxy0.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
    // Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
    proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
    proxy1.close();
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 1, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
    ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
    proxy0.reopen();
    proxy1.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Check that doc 3 is on the leader but not on the notLeaders
    Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
    try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
        assertDocExists(leaderSolr, testCollectionName, "3");
    }
    for (Replica notLeader : notLeaders) {
        try (HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName)) {
            assertDocNotExists(notLeaderSolr, testCollectionName, "3");
        }
    }
    // Retry sending doc 3
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 3, achievedRf);
    // Now doc 3 should be on all replicas
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica)

Example 5 with Replica

use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.

the class HttpPartitionTest method testRf2.

protected void testRf2() throws Exception {
    // create a collection that has 1 shard but 2 replicas
    String testCollectionName = "c8n_1x2";
    createCollectionRetry(testCollectionName, 1, 2, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1);
    Replica notLeader = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive).get(0);
    // ok, now introduce a network partition between the leader and the replica
    SocketProxy proxy = getProxyForReplica(notLeader);
    proxy.close();
    // indexing during a partition
    sendDoc(2);
    // Have the partition last at least 1 sec
    // While this gives the impression that recovery is timing related, this is
    // really only
    // to give time for the state to be written to ZK before the test completes.
    // In other words,
    // without a brief pause, the test finishes so quickly that it doesn't give
    // time for the recovery process to kick-in
    Thread.sleep(sleepMsBeforeHealPartition);
    proxy.reopen();
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
    sendDoc(3);
    // sent 3 docs in so far, verify they are on the leader and replica
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
    // Get the max version from the replica core to make sure it gets updated after recovery (see SOLR-7625)
    JettySolrRunner replicaJetty = getJettyOnPort(getReplicaPort(notLeader));
    CoreContainer coreContainer = replicaJetty.getCoreContainer();
    ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
    String coreName = replicaCoreNodeProps.getCoreName();
    Long maxVersionBefore = null;
    try (SolrCore core = coreContainer.getCore(coreName)) {
        assertNotNull("Core '" + coreName + "' not found for replica: " + notLeader.getName(), core);
        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
        maxVersionBefore = ulog.getCurrentMaxVersion();
    }
    assertNotNull("max version bucket seed not set for core " + coreName, maxVersionBefore);
    log.info("Looked up max version bucket seed " + maxVersionBefore + " for core " + coreName);
    // now up the stakes and do more docs
    int numDocs = TEST_NIGHTLY ? 1000 : 100;
    boolean hasPartition = false;
    for (int d = 0; d < numDocs; d++) {
        // create / restore partition every 100 docs
        if (d % 10 == 0) {
            if (hasPartition) {
                proxy.reopen();
                hasPartition = false;
            } else {
                if (d >= 10) {
                    proxy.close();
                    hasPartition = true;
                    Thread.sleep(sleepMsBeforeHealPartition);
                }
            }
        }
        // 4 is offset as we've already indexed 1-3
        sendDoc(d + 4);
    }
    // restore connectivity if lost
    if (hasPartition) {
        proxy.reopen();
    }
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
    try (SolrCore core = coreContainer.getCore(coreName)) {
        assertNotNull("Core '" + coreName + "' not found for replica: " + notLeader.getName(), core);
        Long currentMaxVersion = core.getUpdateHandler().getUpdateLog().getCurrentMaxVersion();
        log.info("After recovery, looked up NEW max version bucket seed " + currentMaxVersion + " for core " + coreName + ", was: " + maxVersionBefore);
        assertTrue("max version bucket seed not updated after recovery!", currentMaxVersion > maxVersionBefore);
    }
    // verify all docs received
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, numDocs + 3);
    log.info("testRf2 succeeded ... deleting the " + testCollectionName + " collection");
    // try to clean up
    attemptCollectionDelete(cloudClient, testCollectionName);
}
Also used : CoreContainer(org.apache.solr.core.CoreContainer) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) SolrCore(org.apache.solr.core.SolrCore) UpdateLog(org.apache.solr.update.UpdateLog) Replica(org.apache.solr.common.cloud.Replica)

Aggregations

Replica (org.apache.solr.common.cloud.Replica)231 Slice (org.apache.solr.common.cloud.Slice)139 DocCollection (org.apache.solr.common.cloud.DocCollection)86 ArrayList (java.util.ArrayList)81 ClusterState (org.apache.solr.common.cloud.ClusterState)66 HashMap (java.util.HashMap)59 SolrException (org.apache.solr.common.SolrException)53 Test (org.junit.Test)50 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)49 Map (java.util.Map)44 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)35 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)29 NamedList (org.apache.solr.common.util.NamedList)28 SolrQuery (org.apache.solr.client.solrj.SolrQuery)26 IOException (java.io.IOException)25 SolrInputDocument (org.apache.solr.common.SolrInputDocument)25 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)25 HashSet (java.util.HashSet)24 List (java.util.List)20