use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderZkSessionLoss.
// test inspired by SOLR-6511
protected void testLeaderZkSessionLoss() throws Exception {
String testCollectionName = "c8n_1x2_leader_session_loss";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
assertTrue("Expected 1 replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 1);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
String leaderNode = leader.getNodeName();
assertNotNull("Could not find leader for shard1 of " + testCollectionName + "; clusterState: " + printClusterStateInfo(testCollectionName), leader);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(2));
doc.addField("a_t", "hello" + 2);
// cause leader migration by expiring the current leader's zk session
chaosMonkey.expireSession(leaderJetty);
String expectedNewLeaderCoreNodeName = notLeaders.get(0).getName();
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
String currentLeaderName = null;
try {
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
currentLeaderName = currentLeader.getName();
} catch (Exception exc) {
}
if (expectedNewLeaderCoreNodeName.equals(currentLeaderName))
// new leader was elected after zk session expiration
break;
Thread.sleep(500);
}
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
assertEquals(expectedNewLeaderCoreNodeName, currentLeader.getName());
// TODO: This test logic seems to be timing dependent and fails on Jenkins
// need to come up with a better approach
log.info("Sending doc 2 to old leader " + leader.getName());
try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
leaderSolr.add(doc);
leaderSolr.close();
// if the add worked, then the doc must exist on the new leader
try (HttpSolrClient newLeaderSolr = getHttpSolrClient(currentLeader, testCollectionName)) {
assertDocExists(newLeaderSolr, testCollectionName, "2");
}
} catch (SolrException exc) {
// this is ok provided the doc doesn't exist on the current leader
try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
// this should work
client.add(doc);
}
}
List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
Set<String> replicasToCheck = new HashSet<>();
for (Replica stillUp : participatingReplicas) replicasToCheck.add(stillUp.getName());
waitToSeeReplicasActive(testCollectionName, "shard1", replicasToCheck, 20);
assertDocsExistInAllReplicas(participatingReplicas, testCollectionName, 1, 2);
log.info("testLeaderZkSessionLoss succeeded ... deleting the " + testCollectionName + " collection");
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderInitiatedRecoveryCRUD.
/**
* Tests handling of lir state znodes.
*/
protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
String testCollectionName = "c8n_crud_1x2";
String shardId = "shard1";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
CoreContainer cores = leaderJetty.getCoreContainer();
ZkController zkController = cores.getZkController();
assertNotNull("ZkController is null", zkController);
Replica notLeader = ensureAllReplicasAreActive(testCollectionName, shardId, 1, 2, maxWaitSecsToSeeAllActive).get(0);
ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
String replicaUrl = replicaCoreNodeProps.getCoreUrl();
MockCoreDescriptor cd = new MockCoreDescriptor() {
public CloudDescriptor getCloudDescriptor() {
return new CloudDescriptor(leader.getStr(ZkStateReader.CORE_NAME_PROP), new Properties(), this) {
@Override
public String getCoreNodeName() {
return leader.getName();
}
@Override
public boolean isLeader() {
return true;
}
};
}
};
zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
Map<String, Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
// test old non-json format handling
SolrZkClient zkClient = zkController.getZkClient();
String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(testCollectionName, shardId, notLeader.getName());
zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
zkClient.delete(znodePath, -1, false);
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class TestMiniSolrCloudCluster method testStopAllStartAll.
@Test
public void testStopAllStartAll() throws Exception {
final String collectionName = "testStopAllStartAllCollection";
final MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
try {
assertNotNull(miniCluster.getZkServer());
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
assertEquals(NUM_SERVERS, jettys.size());
for (JettySolrRunner jetty : jettys) {
assertTrue(jetty.isRunning());
}
createCollection(miniCluster, collectionName, null, null, Boolean.TRUE, null);
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
cloudSolrClient.setDefaultCollection(collectionName);
final SolrQuery query = new SolrQuery("*:*");
final SolrInputDocument doc = new SolrInputDocument();
try (SolrZkClient zkClient = new SolrZkClient(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
zkStateReader.createClusterStateWatchersAndUpdate();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// modify collection
final int numDocs = 1 + random().nextInt(10);
for (int ii = 1; ii <= numDocs; ++ii) {
doc.setField("id", "" + ii);
cloudSolrClient.add(doc);
if (ii * 2 == numDocs)
cloudSolrClient.commit();
}
cloudSolrClient.commit();
// query collection
{
final QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(numDocs, rsp.getResults().getNumFound());
}
// the test itself
zkStateReader.forceUpdateCollection(collectionName);
final ClusterState clusterState = zkStateReader.getClusterState();
final HashSet<Integer> leaderIndices = new HashSet<Integer>();
final HashSet<Integer> followerIndices = new HashSet<Integer>();
{
final HashMap<String, Boolean> shardLeaderMap = new HashMap<String, Boolean>();
for (final Slice slice : clusterState.getSlices(collectionName)) {
for (final Replica replica : slice.getReplicas()) {
shardLeaderMap.put(replica.getNodeName().replace("_solr", "/solr"), Boolean.FALSE);
}
shardLeaderMap.put(slice.getLeader().getNodeName().replace("_solr", "/solr"), Boolean.TRUE);
}
for (int ii = 0; ii < jettys.size(); ++ii) {
final URL jettyBaseUrl = jettys.get(ii).getBaseUrl();
final String jettyBaseUrlString = jettyBaseUrl.toString().substring((jettyBaseUrl.getProtocol() + "://").length());
final Boolean isLeader = shardLeaderMap.get(jettyBaseUrlString);
if (Boolean.TRUE.equals(isLeader)) {
leaderIndices.add(new Integer(ii));
} else if (Boolean.FALSE.equals(isLeader)) {
followerIndices.add(new Integer(ii));
}
// else neither leader nor follower i.e. node without a replica (for our collection)
}
}
final List<Integer> leaderIndicesList = new ArrayList<Integer>(leaderIndices);
final List<Integer> followerIndicesList = new ArrayList<Integer>(followerIndices);
// first stop the followers (in no particular order)
Collections.shuffle(followerIndicesList, random());
for (Integer ii : followerIndicesList) {
if (!leaderIndices.contains(ii)) {
miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
}
}
// then stop the leaders (again in no particular order)
Collections.shuffle(leaderIndicesList, random());
for (Integer ii : leaderIndicesList) {
miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
}
// calculate restart order
final List<Integer> restartIndicesList = new ArrayList<Integer>();
Collections.shuffle(leaderIndicesList, random());
restartIndicesList.addAll(leaderIndicesList);
Collections.shuffle(followerIndicesList, random());
restartIndicesList.addAll(followerIndicesList);
if (random().nextBoolean())
Collections.shuffle(restartIndicesList, random());
// and then restart jettys in that order
for (Integer ii : restartIndicesList) {
final JettySolrRunner jetty = jettys.get(ii.intValue());
if (!jetty.isRunning()) {
miniCluster.startJettySolrRunner(jetty);
assertTrue(jetty.isRunning());
}
}
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
zkStateReader.forceUpdateCollection(collectionName);
// re-query collection
{
final QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(numDocs, rsp.getResults().getNumFound());
}
}
} finally {
miniCluster.shutdown();
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class TestMiniSolrCloudCluster method testCollectionCreateSearchDelete.
@Test
public void testCollectionCreateSearchDelete() throws Exception {
final String collectionName = "testcollection";
MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
try {
assertNotNull(miniCluster.getZkServer());
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
assertEquals(NUM_SERVERS, jettys.size());
for (JettySolrRunner jetty : jettys) {
assertTrue(jetty.isRunning());
}
// shut down a server
log.info("#### Stopping a server");
JettySolrRunner stoppedServer = miniCluster.stopJettySolrRunner(0);
assertTrue(stoppedServer.isStopped());
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
// create a server
log.info("#### Starting a server");
JettySolrRunner startedServer = miniCluster.startJettySolrRunner();
assertTrue(startedServer.isRunning());
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
// create collection
log.info("#### Creating a collection");
final String asyncId = (random().nextBoolean() ? null : "asyncId(" + collectionName + ".create)=" + random().nextInt());
createCollection(miniCluster, collectionName, null, asyncId, null, null);
ZkStateReader zkStateReader = miniCluster.getSolrClient().getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// modify/query collection
log.info("#### updating a querying collection");
cloudSolrClient.setDefaultCollection(collectionName);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "1");
cloudSolrClient.add(doc);
cloudSolrClient.commit();
SolrQuery query = new SolrQuery();
query.setQuery("*:*");
QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(1, rsp.getResults().getNumFound());
// remove a server not hosting any replicas
zkStateReader.forceUpdateCollection(collectionName);
ClusterState clusterState = zkStateReader.getClusterState();
HashMap<String, JettySolrRunner> jettyMap = new HashMap<String, JettySolrRunner>();
for (JettySolrRunner jetty : miniCluster.getJettySolrRunners()) {
String key = jetty.getBaseUrl().toString().substring((jetty.getBaseUrl().getProtocol() + "://").length());
jettyMap.put(key, jetty);
}
Collection<Slice> slices = clusterState.getSlices(collectionName);
// track the servers not host repliacs
for (Slice slice : slices) {
jettyMap.remove(slice.getLeader().getNodeName().replace("_solr", "/solr"));
for (Replica replica : slice.getReplicas()) {
jettyMap.remove(replica.getNodeName().replace("_solr", "/solr"));
}
}
assertTrue("Expected to find a node without a replica", jettyMap.size() > 0);
log.info("#### Stopping a server");
JettySolrRunner jettyToStop = jettyMap.entrySet().iterator().next().getValue();
jettys = miniCluster.getJettySolrRunners();
for (int i = 0; i < jettys.size(); ++i) {
if (jettys.get(i).equals(jettyToStop)) {
miniCluster.stopJettySolrRunner(i);
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
}
}
// re-create a server (to restore original NUM_SERVERS count)
log.info("#### Starting a server");
startedServer = miniCluster.startJettySolrRunner(jettyToStop);
assertTrue(startedServer.isRunning());
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
CollectionAdminRequest.deleteCollection(collectionName).process(miniCluster.getSolrClient());
// create it again
String asyncId2 = (random().nextBoolean() ? null : "asyncId(" + collectionName + ".create)=" + random().nextInt());
createCollection(miniCluster, collectionName, null, asyncId2, null, null);
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// check that there's no left-over state
assertEquals(0, cloudSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
cloudSolrClient.add(doc);
cloudSolrClient.commit();
assertEquals(1, cloudSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
} finally {
miniCluster.shutdown();
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class TestPullReplica method testRealTimeGet.
public void testRealTimeGet() throws SolrServerException, IOException, KeeperException, InterruptedException {
// should be redirected to Replica.Type.NRT
int numReplicas = random().nextBoolean() ? 1 : 2;
CollectionAdminRequest.createCollection(collectionName, "conf", 1, numReplicas, 0, numReplicas).setMaxShardsPerNode(100).process(cluster.getSolrClient());
waitForState("Unexpected replica count", collectionName, activeReplicaCount(numReplicas, 0, numReplicas));
DocCollection docCollection = assertNumberOfReplicas(numReplicas, 0, numReplicas, false, true);
HttpClient httpClient = cluster.getSolrClient().getHttpClient();
int id = 0;
Slice slice = docCollection.getSlice("shard1");
List<String> ids = new ArrayList<>(slice.getReplicas().size());
for (Replica rAdd : slice.getReplicas()) {
try (HttpSolrClient client = getHttpSolrClient(rAdd.getCoreUrl(), httpClient)) {
client.add(new SolrInputDocument("id", String.valueOf(id), "foo_s", "bar"));
}
SolrDocument docCloudClient = cluster.getSolrClient().getById(collectionName, String.valueOf(id));
assertEquals("bar", docCloudClient.getFieldValue("foo_s"));
for (Replica rGet : slice.getReplicas()) {
try (HttpSolrClient client = getHttpSolrClient(rGet.getCoreUrl(), httpClient)) {
SolrDocument doc = client.getById(String.valueOf(id));
assertEquals("bar", doc.getFieldValue("foo_s"));
}
}
ids.add(String.valueOf(id));
id++;
}
SolrDocumentList previousAllIdsResult = null;
for (Replica rAdd : slice.getReplicas()) {
try (HttpSolrClient client = getHttpSolrClient(rAdd.getCoreUrl(), httpClient)) {
SolrDocumentList allIdsResult = client.getById(ids);
if (previousAllIdsResult != null) {
assertTrue(compareSolrDocumentList(previousAllIdsResult, allIdsResult));
} else {
// set the first response here
previousAllIdsResult = allIdsResult;
assertEquals("Unexpected number of documents", ids.size(), allIdsResult.getNumFound());
}
}
id++;
}
}
Aggregations