Search in sources :

Example 66 with Slice

use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.

the class HttpPartitionTest method testMinRf.

protected void testMinRf() throws Exception {
    // create a collection that has 1 shard and 3 replicas
    String testCollectionName = "collMinRf_1x3";
    createCollection(testCollectionName, 1, 3, 1);
    cloudClient.setDefaultCollection(testCollectionName);
    sendDoc(1, 2);
    List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    assertTrue("Expected 2 non-leader replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 2);
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 1);
    // Now introduce a network partition between the leader and 1 replica, so a minRf of 2 is still achieved
    SocketProxy proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    // indexing during a partition
    int achievedRf = sendDoc(2, 2);
    assertEquals("Unexpected achieved replication factor", 2, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replica is DOWN
    ZkStateReader zkr = cloudClient.getZkStateReader();
    // force the state to be fresh
    zkr.forceUpdateCollection(testCollectionName);
    // force the state to be fresh
    ;
    ClusterState cs = zkr.getClusterState();
    Collection<Slice> slices = cs.getActiveSlices(testCollectionName);
    Slice slice = slices.iterator().next();
    Replica partitionedReplica = slice.getReplica(notLeaders.get(0).getName());
    assertEquals("The partitioned replica did not get marked down", Replica.State.DOWN.toString(), partitionedReplica.getStr(ZkStateReader.STATE_PROP));
    proxy0.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Since minRf is achieved, we expect recovery, so we expect seeing 2 documents
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 2);
    // Now introduce a network partition between the leader and both of its replicas, so a minRf of 2 is NOT achieved
    proxy0 = getProxyForReplica(notLeaders.get(0));
    proxy0.close();
    SocketProxy proxy1 = getProxyForReplica(notLeaders.get(1));
    proxy1.close();
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 1, achievedRf);
    Thread.sleep(sleepMsBeforeHealPartition);
    // Verify that the partitioned replicas are NOT DOWN since minRf wasn't achieved
    ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, 1);
    proxy0.reopen();
    proxy1.reopen();
    notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 3, maxWaitSecsToSeeAllActive);
    // Check that doc 3 is on the leader but not on the notLeaders
    Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1", 10000);
    try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
        assertDocExists(leaderSolr, testCollectionName, "3");
    }
    for (Replica notLeader : notLeaders) {
        try (HttpSolrClient notLeaderSolr = getHttpSolrClient(notLeader, testCollectionName)) {
            assertDocNotExists(notLeaderSolr, testCollectionName, "3");
        }
    }
    // Retry sending doc 3
    achievedRf = sendDoc(3, 2);
    assertEquals("Unexpected achieved replication factor", 3, achievedRf);
    // Now doc 3 should be on all replicas
    assertDocsExistInAllReplicas(notLeaders, testCollectionName, 1, 3);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica)

Example 67 with Slice

use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.

the class DeleteInactiveReplicaTest method deleteInactiveReplicaTest.

@Test
public void deleteInactiveReplicaTest() throws Exception {
    String collectionName = "delDeadColl";
    int replicationFactor = 2;
    int numShards = 2;
    int maxShardsPerNode = ((((numShards + 1) * replicationFactor) / cluster.getJettySolrRunners().size())) + 1;
    CollectionAdminRequest.createCollection(collectionName, "conf", numShards, replicationFactor).setMaxShardsPerNode(maxShardsPerNode).process(cluster.getSolrClient());
    waitForState("Expected a cluster of 2 shards and 2 replicas", collectionName, (n, c) -> {
        return DocCollection.isFullyActive(n, c, numShards, replicationFactor);
    });
    DocCollection collectionState = getCollectionState(collectionName);
    Slice shard = getRandomShard(collectionState);
    Replica replica = getRandomReplica(shard);
    JettySolrRunner jetty = cluster.getReplicaJetty(replica);
    cluster.stopJettySolrRunner(jetty);
    waitForState("Expected replica " + replica.getName() + " on down node to be removed from cluster state", collectionName, (n, c) -> {
        Replica r = c.getReplica(replica.getCoreName());
        return r == null || r.getState() != Replica.State.ACTIVE;
    });
    log.info("Removing replica {}/{} ", shard.getName(), replica.getName());
    CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).process(cluster.getSolrClient());
    waitForState("Expected deleted replica " + replica.getName() + " to be removed from cluster state", collectionName, (n, c) -> {
        return c.getReplica(replica.getCoreName()) == null;
    });
    cluster.startJettySolrRunner(jetty);
    log.info("restarted jetty");
    CoreContainer cc = jetty.getCoreContainer();
    CoreContainer.CoreLoadFailure loadFailure = cc.getCoreInitFailures().get(replica.getCoreName());
    assertNotNull("Deleted core was still loaded!", loadFailure);
    assertTrue("Unexpected load failure message: " + loadFailure.exception.getMessage(), loadFailure.exception.getMessage().contains("does not exist in shard"));
    // Check that we can't create a core with no coreNodeName
    try (SolrClient queryClient = getHttpSolrClient(jetty.getBaseUrl().toString())) {
        Exception e = expectThrows(Exception.class, () -> {
            CoreAdminRequest.Create createRequest = new CoreAdminRequest.Create();
            createRequest.setCoreName("testcore");
            createRequest.setCollection(collectionName);
            createRequest.setShardId("shard2");
            queryClient.request(createRequest);
        });
        assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("coreNodeName missing"));
    }
}
Also used : JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) Replica(org.apache.solr.common.cloud.Replica) CoreContainer(org.apache.solr.core.CoreContainer) SolrClient(org.apache.solr.client.solrj.SolrClient) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Test(org.junit.Test)

Example 68 with Slice

use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.

the class DeleteReplicaTest method deleteLiveReplicaTest.

@Test
public void deleteLiveReplicaTest() throws Exception {
    final String collectionName = "delLiveColl";
    CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2).process(cluster.getSolrClient());
    DocCollection state = getCollectionState(collectionName);
    Slice shard = getRandomShard(state);
    Replica replica = getRandomReplica(shard, (r) -> r.getState() == Replica.State.ACTIVE);
    CoreStatus coreStatus = getCoreStatus(replica);
    Path dataDir = Paths.get(coreStatus.getDataDirectory());
    Exception e = expectThrows(Exception.class, () -> {
        CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).setOnlyIfDown(true).process(cluster.getSolrClient());
    });
    assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("state is 'active'"));
    assertTrue("Data directory for " + replica.getName() + " should not have been deleted", Files.exists(dataDir));
    CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).process(cluster.getSolrClient());
    waitForState("Expected replica " + replica.getName() + " to have been removed", collectionName, (n, c) -> {
        Slice testShard = c.getSlice(shard.getName());
        return testShard.getReplica(replica.getName()) == null;
    });
    assertFalse("Data directory for " + replica.getName() + " should have been removed", Files.exists(dataDir));
}
Also used : Path(java.nio.file.Path) CoreStatus(org.apache.solr.client.solrj.request.CoreStatus) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) Test(org.junit.Test)

Example 69 with Slice

use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.

the class CollectionsAPISolrJTest method testBalanceShardUnique.

@Test
public void testBalanceShardUnique() throws IOException, SolrServerException, KeeperException, InterruptedException {
    final String collection = "balancedProperties";
    CollectionAdminRequest.createCollection(collection, "conf", 2, 2).process(cluster.getSolrClient());
    CollectionAdminResponse response = CollectionAdminRequest.balanceReplicaProperty(collection, "preferredLeader").process(cluster.getSolrClient());
    assertEquals(0, response.getStatus());
    waitForState("Expecting 'preferredleader' property to be balanced across all shards", collection, (n, c) -> {
        for (Slice slice : c) {
            int count = 0;
            for (Replica replica : slice) {
                if ("true".equals(replica.getStr("property.preferredleader")))
                    count += 1;
            }
            if (count != 1)
                return false;
        }
        return true;
    });
}
Also used : CollectionAdminResponse(org.apache.solr.client.solrj.response.CollectionAdminResponse) Slice(org.apache.solr.common.cloud.Slice) Replica(org.apache.solr.common.cloud.Replica) Test(org.junit.Test)

Example 70 with Slice

use of org.apache.solr.common.cloud.Slice in project lucene-solr by apache.

the class TestMiniSolrCloudCluster method testStopAllStartAll.

@Test
public void testStopAllStartAll() throws Exception {
    final String collectionName = "testStopAllStartAllCollection";
    final MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
    try {
        assertNotNull(miniCluster.getZkServer());
        List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
        assertEquals(NUM_SERVERS, jettys.size());
        for (JettySolrRunner jetty : jettys) {
            assertTrue(jetty.isRunning());
        }
        createCollection(miniCluster, collectionName, null, null, Boolean.TRUE, null);
        final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
        cloudSolrClient.setDefaultCollection(collectionName);
        final SolrQuery query = new SolrQuery("*:*");
        final SolrInputDocument doc = new SolrInputDocument();
        try (SolrZkClient zkClient = new SolrZkClient(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
            ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
            zkStateReader.createClusterStateWatchersAndUpdate();
            AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
            // modify collection
            final int numDocs = 1 + random().nextInt(10);
            for (int ii = 1; ii <= numDocs; ++ii) {
                doc.setField("id", "" + ii);
                cloudSolrClient.add(doc);
                if (ii * 2 == numDocs)
                    cloudSolrClient.commit();
            }
            cloudSolrClient.commit();
            // query collection
            {
                final QueryResponse rsp = cloudSolrClient.query(query);
                assertEquals(numDocs, rsp.getResults().getNumFound());
            }
            // the test itself
            zkStateReader.forceUpdateCollection(collectionName);
            final ClusterState clusterState = zkStateReader.getClusterState();
            final HashSet<Integer> leaderIndices = new HashSet<Integer>();
            final HashSet<Integer> followerIndices = new HashSet<Integer>();
            {
                final HashMap<String, Boolean> shardLeaderMap = new HashMap<String, Boolean>();
                for (final Slice slice : clusterState.getSlices(collectionName)) {
                    for (final Replica replica : slice.getReplicas()) {
                        shardLeaderMap.put(replica.getNodeName().replace("_solr", "/solr"), Boolean.FALSE);
                    }
                    shardLeaderMap.put(slice.getLeader().getNodeName().replace("_solr", "/solr"), Boolean.TRUE);
                }
                for (int ii = 0; ii < jettys.size(); ++ii) {
                    final URL jettyBaseUrl = jettys.get(ii).getBaseUrl();
                    final String jettyBaseUrlString = jettyBaseUrl.toString().substring((jettyBaseUrl.getProtocol() + "://").length());
                    final Boolean isLeader = shardLeaderMap.get(jettyBaseUrlString);
                    if (Boolean.TRUE.equals(isLeader)) {
                        leaderIndices.add(new Integer(ii));
                    } else if (Boolean.FALSE.equals(isLeader)) {
                        followerIndices.add(new Integer(ii));
                    }
                // else neither leader nor follower i.e. node without a replica (for our collection)
                }
            }
            final List<Integer> leaderIndicesList = new ArrayList<Integer>(leaderIndices);
            final List<Integer> followerIndicesList = new ArrayList<Integer>(followerIndices);
            // first stop the followers (in no particular order)
            Collections.shuffle(followerIndicesList, random());
            for (Integer ii : followerIndicesList) {
                if (!leaderIndices.contains(ii)) {
                    miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
                }
            }
            // then stop the leaders (again in no particular order)
            Collections.shuffle(leaderIndicesList, random());
            for (Integer ii : leaderIndicesList) {
                miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
            }
            // calculate restart order
            final List<Integer> restartIndicesList = new ArrayList<Integer>();
            Collections.shuffle(leaderIndicesList, random());
            restartIndicesList.addAll(leaderIndicesList);
            Collections.shuffle(followerIndicesList, random());
            restartIndicesList.addAll(followerIndicesList);
            if (random().nextBoolean())
                Collections.shuffle(restartIndicesList, random());
            // and then restart jettys in that order
            for (Integer ii : restartIndicesList) {
                final JettySolrRunner jetty = jettys.get(ii.intValue());
                if (!jetty.isRunning()) {
                    miniCluster.startJettySolrRunner(jetty);
                    assertTrue(jetty.isRunning());
                }
            }
            AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
            zkStateReader.forceUpdateCollection(collectionName);
            // re-query collection
            {
                final QueryResponse rsp = cloudSolrClient.query(query);
                assertEquals(numDocs, rsp.getResults().getNumFound());
            }
        }
    } finally {
        miniCluster.shutdown();
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) ArrayList(java.util.ArrayList) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) Replica(org.apache.solr.common.cloud.Replica) SolrQuery(org.apache.solr.client.solrj.SolrQuery) URL(java.net.URL) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrInputDocument(org.apache.solr.common.SolrInputDocument) Slice(org.apache.solr.common.cloud.Slice) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Slice (org.apache.solr.common.cloud.Slice)220 Replica (org.apache.solr.common.cloud.Replica)142 DocCollection (org.apache.solr.common.cloud.DocCollection)121 ClusterState (org.apache.solr.common.cloud.ClusterState)81 ArrayList (java.util.ArrayList)79 HashMap (java.util.HashMap)67 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)50 SolrException (org.apache.solr.common.SolrException)49 Map (java.util.Map)46 Test (org.junit.Test)37 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)28 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)25 HashSet (java.util.HashSet)24 SolrQuery (org.apache.solr.client.solrj.SolrQuery)24 IOException (java.io.IOException)23 NamedList (org.apache.solr.common.util.NamedList)23 List (java.util.List)22 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)22 DocRouter (org.apache.solr.common.cloud.DocRouter)20 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)20