use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderZkSessionLoss.
// test inspired by SOLR-6511
protected void testLeaderZkSessionLoss() throws Exception {
String testCollectionName = "c8n_1x2_leader_session_loss";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
assertTrue("Expected 1 replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 1);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
String leaderNode = leader.getNodeName();
assertNotNull("Could not find leader for shard1 of " + testCollectionName + "; clusterState: " + printClusterStateInfo(testCollectionName), leader);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(2));
doc.addField("a_t", "hello" + 2);
// cause leader migration by expiring the current leader's zk session
chaosMonkey.expireSession(leaderJetty);
String expectedNewLeaderCoreNodeName = notLeaders.get(0).getName();
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
String currentLeaderName = null;
try {
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
currentLeaderName = currentLeader.getName();
} catch (Exception exc) {
}
if (expectedNewLeaderCoreNodeName.equals(currentLeaderName))
// new leader was elected after zk session expiration
break;
Thread.sleep(500);
}
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
assertEquals(expectedNewLeaderCoreNodeName, currentLeader.getName());
// TODO: This test logic seems to be timing dependent and fails on Jenkins
// need to come up with a better approach
log.info("Sending doc 2 to old leader " + leader.getName());
try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
leaderSolr.add(doc);
leaderSolr.close();
// if the add worked, then the doc must exist on the new leader
try (HttpSolrClient newLeaderSolr = getHttpSolrClient(currentLeader, testCollectionName)) {
assertDocExists(newLeaderSolr, testCollectionName, "2");
}
} catch (SolrException exc) {
// this is ok provided the doc doesn't exist on the current leader
try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
// this should work
client.add(doc);
}
}
List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
Set<String> replicasToCheck = new HashSet<>();
for (Replica stillUp : participatingReplicas) replicasToCheck.add(stillUp.getName());
waitToSeeReplicasActive(testCollectionName, "shard1", replicasToCheck, 20);
assertDocsExistInAllReplicas(participatingReplicas, testCollectionName, 1, 2);
log.info("testLeaderZkSessionLoss succeeded ... deleting the " + testCollectionName + " collection");
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderInitiatedRecoveryCRUD.
/**
* Tests handling of lir state znodes.
*/
protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
String testCollectionName = "c8n_crud_1x2";
String shardId = "shard1";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
CoreContainer cores = leaderJetty.getCoreContainer();
ZkController zkController = cores.getZkController();
assertNotNull("ZkController is null", zkController);
Replica notLeader = ensureAllReplicasAreActive(testCollectionName, shardId, 1, 2, maxWaitSecsToSeeAllActive).get(0);
ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
String replicaUrl = replicaCoreNodeProps.getCoreUrl();
MockCoreDescriptor cd = new MockCoreDescriptor() {
public CloudDescriptor getCloudDescriptor() {
return new CloudDescriptor(leader.getStr(ZkStateReader.CORE_NAME_PROP), new Properties(), this) {
@Override
public String getCoreNodeName() {
return leader.getName();
}
@Override
public boolean isLeader() {
return true;
}
};
}
};
zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
Map<String, Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
// test old non-json format handling
SolrZkClient zkClient = zkController.getZkClient();
String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(testCollectionName, shardId, notLeader.getName());
zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
zkClient.delete(znodePath, -1, false);
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class TestCloudRecovery method leaderRecoverFromLogOnStartupTest.
@Test
public void leaderRecoverFromLogOnStartupTest() throws Exception {
AtomicInteger countReplayLog = new AtomicInteger(0);
DirectUpdateHandler2.commitOnClose = false;
UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet;
CloudSolrClient cloudClient = cluster.getSolrClient();
cloudClient.add(COLLECTION, sdoc("id", "1"));
cloudClient.add(COLLECTION, sdoc("id", "2"));
cloudClient.add(COLLECTION, sdoc("id", "3"));
cloudClient.add(COLLECTION, sdoc("id", "4"));
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "*:*");
QueryResponse resp = cloudClient.query(COLLECTION, params);
assertEquals(0, resp.getResults().getNumFound());
ChaosMonkey.stop(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
ChaosMonkey.start(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
resp = cloudClient.query(COLLECTION, params);
assertEquals(4, resp.getResults().getNumFound());
// Make sure all nodes is recover from tlog
if (onlyLeaderIndexes) {
// Leader election can be kicked off, so 2 tlog replicas will replay its tlog before becoming new leader
assertTrue(countReplayLog.get() >= 2);
} else {
assertEquals(4, countReplayLog.get());
}
// check metrics
int replicationCount = 0;
int errorsCount = 0;
int skippedCount = 0;
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
SolrMetricManager manager = jetty.getCoreContainer().getMetricManager();
List<String> registryNames = manager.registryNames().stream().filter(s -> s.startsWith("solr.core.")).collect(Collectors.toList());
for (String registry : registryNames) {
Map<String, Metric> metrics = manager.registry(registry).getMetrics();
Timer timer = (Timer) metrics.get("REPLICATION.peerSync.time");
Counter counter = (Counter) metrics.get("REPLICATION.peerSync.errors");
Counter skipped = (Counter) metrics.get("REPLICATION.peerSync.skipped");
replicationCount += timer.getCount();
errorsCount += counter.getCount();
skippedCount += skipped.getCount();
}
}
if (onlyLeaderIndexes) {
assertTrue(replicationCount >= 2);
} else {
assertEquals(2, replicationCount);
}
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class TestCloudRecovery method corruptedLogTest.
@Test
public void corruptedLogTest() throws Exception {
AtomicInteger countReplayLog = new AtomicInteger(0);
DirectUpdateHandler2.commitOnClose = false;
UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet;
CloudSolrClient cloudClient = cluster.getSolrClient();
cloudClient.add(COLLECTION, sdoc("id", "1000"));
cloudClient.add(COLLECTION, sdoc("id", "1001"));
for (int i = 0; i < 10; i++) {
cloudClient.add(COLLECTION, sdoc("id", String.valueOf(i)));
}
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "*:*");
QueryResponse resp = cloudClient.query(COLLECTION, params);
assertEquals(0, resp.getResults().getNumFound());
int logHeaderSize = Integer.MAX_VALUE;
Map<File, byte[]> contentFiles = new HashMap<>();
for (JettySolrRunner solrRunner : cluster.getJettySolrRunners()) {
for (SolrCore solrCore : solrRunner.getCoreContainer().getCores()) {
File tlogFolder = new File(solrCore.getUlogDir(), UpdateLog.TLOG_NAME);
String[] tLogFiles = tlogFolder.list();
Arrays.sort(tLogFiles);
File lastTLogFile = new File(tlogFolder.getAbsolutePath() + "/" + tLogFiles[tLogFiles.length - 1]);
byte[] tlogBytes = IOUtils.toByteArray(new FileInputStream(lastTLogFile));
contentFiles.put(lastTLogFile, tlogBytes);
logHeaderSize = Math.min(tlogBytes.length, logHeaderSize);
}
}
ChaosMonkey.stop(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
for (Map.Entry<File, byte[]> entry : contentFiles.entrySet()) {
byte[] tlogBytes = entry.getValue();
if (tlogBytes.length <= logHeaderSize)
continue;
FileOutputStream stream = new FileOutputStream(entry.getKey());
int skipLastBytes = Math.max(random().nextInt(tlogBytes.length - logHeaderSize), 2);
for (int i = 0; i < entry.getValue().length - skipLastBytes; i++) {
stream.write(tlogBytes[i]);
}
stream.close();
}
ChaosMonkey.start(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
resp = cloudClient.query(COLLECTION, params);
// Make sure cluster still healthy
assertTrue(resp.getResults().getNumFound() >= 2);
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class TestMiniSolrCloudCluster method testStopAllStartAll.
@Test
public void testStopAllStartAll() throws Exception {
final String collectionName = "testStopAllStartAllCollection";
final MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
try {
assertNotNull(miniCluster.getZkServer());
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
assertEquals(NUM_SERVERS, jettys.size());
for (JettySolrRunner jetty : jettys) {
assertTrue(jetty.isRunning());
}
createCollection(miniCluster, collectionName, null, null, Boolean.TRUE, null);
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
cloudSolrClient.setDefaultCollection(collectionName);
final SolrQuery query = new SolrQuery("*:*");
final SolrInputDocument doc = new SolrInputDocument();
try (SolrZkClient zkClient = new SolrZkClient(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
zkStateReader.createClusterStateWatchersAndUpdate();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// modify collection
final int numDocs = 1 + random().nextInt(10);
for (int ii = 1; ii <= numDocs; ++ii) {
doc.setField("id", "" + ii);
cloudSolrClient.add(doc);
if (ii * 2 == numDocs)
cloudSolrClient.commit();
}
cloudSolrClient.commit();
// query collection
{
final QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(numDocs, rsp.getResults().getNumFound());
}
// the test itself
zkStateReader.forceUpdateCollection(collectionName);
final ClusterState clusterState = zkStateReader.getClusterState();
final HashSet<Integer> leaderIndices = new HashSet<Integer>();
final HashSet<Integer> followerIndices = new HashSet<Integer>();
{
final HashMap<String, Boolean> shardLeaderMap = new HashMap<String, Boolean>();
for (final Slice slice : clusterState.getSlices(collectionName)) {
for (final Replica replica : slice.getReplicas()) {
shardLeaderMap.put(replica.getNodeName().replace("_solr", "/solr"), Boolean.FALSE);
}
shardLeaderMap.put(slice.getLeader().getNodeName().replace("_solr", "/solr"), Boolean.TRUE);
}
for (int ii = 0; ii < jettys.size(); ++ii) {
final URL jettyBaseUrl = jettys.get(ii).getBaseUrl();
final String jettyBaseUrlString = jettyBaseUrl.toString().substring((jettyBaseUrl.getProtocol() + "://").length());
final Boolean isLeader = shardLeaderMap.get(jettyBaseUrlString);
if (Boolean.TRUE.equals(isLeader)) {
leaderIndices.add(new Integer(ii));
} else if (Boolean.FALSE.equals(isLeader)) {
followerIndices.add(new Integer(ii));
}
// else neither leader nor follower i.e. node without a replica (for our collection)
}
}
final List<Integer> leaderIndicesList = new ArrayList<Integer>(leaderIndices);
final List<Integer> followerIndicesList = new ArrayList<Integer>(followerIndices);
// first stop the followers (in no particular order)
Collections.shuffle(followerIndicesList, random());
for (Integer ii : followerIndicesList) {
if (!leaderIndices.contains(ii)) {
miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
}
}
// then stop the leaders (again in no particular order)
Collections.shuffle(leaderIndicesList, random());
for (Integer ii : leaderIndicesList) {
miniCluster.stopJettySolrRunner(jettys.get(ii.intValue()));
}
// calculate restart order
final List<Integer> restartIndicesList = new ArrayList<Integer>();
Collections.shuffle(leaderIndicesList, random());
restartIndicesList.addAll(leaderIndicesList);
Collections.shuffle(followerIndicesList, random());
restartIndicesList.addAll(followerIndicesList);
if (random().nextBoolean())
Collections.shuffle(restartIndicesList, random());
// and then restart jettys in that order
for (Integer ii : restartIndicesList) {
final JettySolrRunner jetty = jettys.get(ii.intValue());
if (!jetty.isRunning()) {
miniCluster.startJettySolrRunner(jetty);
assertTrue(jetty.isRunning());
}
}
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
zkStateReader.forceUpdateCollection(collectionName);
// re-query collection
{
final QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(numDocs, rsp.getResults().getNumFound());
}
}
} finally {
miniCluster.shutdown();
}
}
Aggregations