use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class DeleteInactiveReplicaTest method deleteInactiveReplicaTest.
@Test
public void deleteInactiveReplicaTest() throws Exception {
String collectionName = "delDeadColl";
int replicationFactor = 2;
int numShards = 2;
int maxShardsPerNode = ((((numShards + 1) * replicationFactor) / cluster.getJettySolrRunners().size())) + 1;
CollectionAdminRequest.createCollection(collectionName, "conf", numShards, replicationFactor).setMaxShardsPerNode(maxShardsPerNode).process(cluster.getSolrClient());
waitForState("Expected a cluster of 2 shards and 2 replicas", collectionName, (n, c) -> {
return DocCollection.isFullyActive(n, c, numShards, replicationFactor);
});
DocCollection collectionState = getCollectionState(collectionName);
Slice shard = getRandomShard(collectionState);
Replica replica = getRandomReplica(shard);
JettySolrRunner jetty = cluster.getReplicaJetty(replica);
cluster.stopJettySolrRunner(jetty);
waitForState("Expected replica " + replica.getName() + " on down node to be removed from cluster state", collectionName, (n, c) -> {
Replica r = c.getReplica(replica.getCoreName());
return r == null || r.getState() != Replica.State.ACTIVE;
});
log.info("Removing replica {}/{} ", shard.getName(), replica.getName());
CollectionAdminRequest.deleteReplica(collectionName, shard.getName(), replica.getName()).process(cluster.getSolrClient());
waitForState("Expected deleted replica " + replica.getName() + " to be removed from cluster state", collectionName, (n, c) -> {
return c.getReplica(replica.getCoreName()) == null;
});
cluster.startJettySolrRunner(jetty);
log.info("restarted jetty");
CoreContainer cc = jetty.getCoreContainer();
CoreContainer.CoreLoadFailure loadFailure = cc.getCoreInitFailures().get(replica.getCoreName());
assertNotNull("Deleted core was still loaded!", loadFailure);
assertTrue("Unexpected load failure message: " + loadFailure.exception.getMessage(), loadFailure.exception.getMessage().contains("does not exist in shard"));
// Check that we can't create a core with no coreNodeName
try (SolrClient queryClient = getHttpSolrClient(jetty.getBaseUrl().toString())) {
Exception e = expectThrows(Exception.class, () -> {
CoreAdminRequest.Create createRequest = new CoreAdminRequest.Create();
createRequest.setCoreName("testcore");
createRequest.setCollection(collectionName);
createRequest.setShardId("shard2");
queryClient.request(createRequest);
});
assertTrue("Unexpected error message: " + e.getMessage(), e.getMessage().contains("coreNodeName missing"));
}
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class CollectionsAPIDistributedZkTest method testCreateNodeSet.
@Test
public void testCreateNodeSet() throws Exception {
JettySolrRunner jetty1 = cluster.getRandomJetty(random());
JettySolrRunner jetty2 = cluster.getRandomJetty(random());
List<String> baseUrls = ImmutableList.of(jetty1.getBaseUrl().toString(), jetty2.getBaseUrl().toString());
CollectionAdminRequest.createCollection("nodeset_collection", "conf", 2, 1).setCreateNodeSet(baseUrls.get(0) + "," + baseUrls.get(1)).process(cluster.getSolrClient());
DocCollection collectionState = getCollectionState("nodeset_collection");
for (Replica replica : collectionState.getReplicas()) {
String replicaUrl = replica.getCoreUrl();
boolean matchingJetty = false;
for (String jettyUrl : baseUrls) {
if (replicaUrl.startsWith(jettyUrl))
matchingJetty = true;
}
if (matchingJetty == false)
fail("Expected replica to be on " + baseUrls + " but was on " + replicaUrl);
}
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderZkSessionLoss.
// test inspired by SOLR-6511
protected void testLeaderZkSessionLoss() throws Exception {
String testCollectionName = "c8n_1x2_leader_session_loss";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
sendDoc(1);
List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, "shard1", 1, 2, maxWaitSecsToSeeAllActive);
assertTrue("Expected 1 replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), notLeaders.size() == 1);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
String leaderNode = leader.getNodeName();
assertNotNull("Could not find leader for shard1 of " + testCollectionName + "; clusterState: " + printClusterStateInfo(testCollectionName), leader);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(2));
doc.addField("a_t", "hello" + 2);
// cause leader migration by expiring the current leader's zk session
chaosMonkey.expireSession(leaderJetty);
String expectedNewLeaderCoreNodeName = notLeaders.get(0).getName();
long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS);
while (System.nanoTime() < timeout) {
String currentLeaderName = null;
try {
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
currentLeaderName = currentLeader.getName();
} catch (Exception exc) {
}
if (expectedNewLeaderCoreNodeName.equals(currentLeaderName))
// new leader was elected after zk session expiration
break;
Thread.sleep(500);
}
Replica currentLeader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, "shard1");
assertEquals(expectedNewLeaderCoreNodeName, currentLeader.getName());
// TODO: This test logic seems to be timing dependent and fails on Jenkins
// need to come up with a better approach
log.info("Sending doc 2 to old leader " + leader.getName());
try (HttpSolrClient leaderSolr = getHttpSolrClient(leader, testCollectionName)) {
leaderSolr.add(doc);
leaderSolr.close();
// if the add worked, then the doc must exist on the new leader
try (HttpSolrClient newLeaderSolr = getHttpSolrClient(currentLeader, testCollectionName)) {
assertDocExists(newLeaderSolr, testCollectionName, "2");
}
} catch (SolrException exc) {
// this is ok provided the doc doesn't exist on the current leader
try (HttpSolrClient client = getHttpSolrClient(currentLeader, testCollectionName)) {
// this should work
client.add(doc);
}
}
List<Replica> participatingReplicas = getActiveOrRecoveringReplicas(testCollectionName, "shard1");
Set<String> replicasToCheck = new HashSet<>();
for (Replica stillUp : participatingReplicas) replicasToCheck.add(stillUp.getName());
waitToSeeReplicasActive(testCollectionName, "shard1", replicasToCheck, 20);
assertDocsExistInAllReplicas(participatingReplicas, testCollectionName, 1, 2);
log.info("testLeaderZkSessionLoss succeeded ... deleting the " + testCollectionName + " collection");
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class HttpPartitionTest method testLeaderInitiatedRecoveryCRUD.
/**
* Tests handling of lir state znodes.
*/
protected void testLeaderInitiatedRecoveryCRUD() throws Exception {
String testCollectionName = "c8n_crud_1x2";
String shardId = "shard1";
createCollectionRetry(testCollectionName, 1, 2, 1);
cloudClient.setDefaultCollection(testCollectionName);
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, shardId);
JettySolrRunner leaderJetty = getJettyOnPort(getReplicaPort(leader));
CoreContainer cores = leaderJetty.getCoreContainer();
ZkController zkController = cores.getZkController();
assertNotNull("ZkController is null", zkController);
Replica notLeader = ensureAllReplicasAreActive(testCollectionName, shardId, 1, 2, maxWaitSecsToSeeAllActive).get(0);
ZkCoreNodeProps replicaCoreNodeProps = new ZkCoreNodeProps(notLeader);
String replicaUrl = replicaCoreNodeProps.getCoreUrl();
MockCoreDescriptor cd = new MockCoreDescriptor() {
public CloudDescriptor getCloudDescriptor() {
return new CloudDescriptor(leader.getStr(ZkStateReader.CORE_NAME_PROP), new Properties(), this) {
@Override
public String getCoreNodeName() {
return leader.getName();
}
@Override
public boolean isLeader() {
return true;
}
};
}
};
zkController.updateLeaderInitiatedRecoveryState(testCollectionName, shardId, notLeader.getName(), Replica.State.DOWN, cd, true);
Map<String, Object> lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
// test old non-json format handling
SolrZkClient zkClient = zkController.getZkClient();
String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(testCollectionName, shardId, notLeader.getName());
zkClient.setData(znodePath, "down".getBytes(StandardCharsets.UTF_8), true);
lirStateMap = zkController.getLeaderInitiatedRecoveryStateObject(testCollectionName, shardId, notLeader.getName());
assertNotNull(lirStateMap);
assertSame(Replica.State.DOWN, Replica.State.getState((String) lirStateMap.get(ZkStateReader.STATE_PROP)));
zkClient.delete(znodePath, -1, false);
// try to clean up
attemptCollectionDelete(cloudClient, testCollectionName);
}
use of org.apache.solr.client.solrj.embedded.JettySolrRunner in project lucene-solr by apache.
the class TestCloudRecovery method leaderRecoverFromLogOnStartupTest.
@Test
public void leaderRecoverFromLogOnStartupTest() throws Exception {
AtomicInteger countReplayLog = new AtomicInteger(0);
DirectUpdateHandler2.commitOnClose = false;
UpdateLog.testing_logReplayFinishHook = countReplayLog::incrementAndGet;
CloudSolrClient cloudClient = cluster.getSolrClient();
cloudClient.add(COLLECTION, sdoc("id", "1"));
cloudClient.add(COLLECTION, sdoc("id", "2"));
cloudClient.add(COLLECTION, sdoc("id", "3"));
cloudClient.add(COLLECTION, sdoc("id", "4"));
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "*:*");
QueryResponse resp = cloudClient.query(COLLECTION, params);
assertEquals(0, resp.getResults().getNumFound());
ChaosMonkey.stop(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all not live", ClusterStateUtil.waitForAllReplicasNotLive(cloudClient.getZkStateReader(), 45000));
ChaosMonkey.start(cluster.getJettySolrRunners());
assertTrue("Timeout waiting for all live and active", ClusterStateUtil.waitForAllActiveAndLiveReplicas(cloudClient.getZkStateReader(), COLLECTION, 120000));
resp = cloudClient.query(COLLECTION, params);
assertEquals(4, resp.getResults().getNumFound());
// Make sure all nodes is recover from tlog
if (onlyLeaderIndexes) {
// Leader election can be kicked off, so 2 tlog replicas will replay its tlog before becoming new leader
assertTrue(countReplayLog.get() >= 2);
} else {
assertEquals(4, countReplayLog.get());
}
// check metrics
int replicationCount = 0;
int errorsCount = 0;
int skippedCount = 0;
for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
SolrMetricManager manager = jetty.getCoreContainer().getMetricManager();
List<String> registryNames = manager.registryNames().stream().filter(s -> s.startsWith("solr.core.")).collect(Collectors.toList());
for (String registry : registryNames) {
Map<String, Metric> metrics = manager.registry(registry).getMetrics();
Timer timer = (Timer) metrics.get("REPLICATION.peerSync.time");
Counter counter = (Counter) metrics.get("REPLICATION.peerSync.errors");
Counter skipped = (Counter) metrics.get("REPLICATION.peerSync.skipped");
replicationCount += timer.getCount();
errorsCount += counter.getCount();
skippedCount += skipped.getCount();
}
}
if (onlyLeaderIndexes) {
assertTrue(replicationCount >= 2);
} else {
assertEquals(2, replicationCount);
}
}
Aggregations