use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class ForceLeaderTest method unsetLeader.
protected void unsetLeader(String collection, String slice) throws Exception {
DistributedQueue inQueue = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(), ZkStateReader.SHARD_ID_PROP, slice, ZkStateReader.COLLECTION_PROP, collection);
inQueue.offer(Utils.toJSON(m));
ClusterState clusterState = null;
boolean transition = false;
for (int counter = 10; counter > 0; counter--) {
clusterState = zkStateReader.getClusterState();
Replica newLeader = clusterState.getSlice(collection, slice).getLeader();
if (newLeader == null) {
transition = true;
break;
}
Thread.sleep(1000);
}
if (!transition) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not unset replica leader" + ". Cluster state: " + printClusterStateInfo(collection));
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class LeaderElectionTest method testCancelElection.
@Test
public void testCancelElection() throws Exception {
LeaderElector first = new LeaderElector(zkClient);
ZkNodeProps props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "1");
ElectionContext firstContext = new ShardLeaderElectionContextBase(first, "slice1", "collection2", "dummynode1", props, zkStateReader);
first.setup(firstContext);
first.joinElection(firstContext, false);
Thread.sleep(1000);
assertEquals("original leader was not registered", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
LeaderElector second = new LeaderElector(zkClient);
props = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, "http://127.0.0.1/solr/", ZkStateReader.CORE_NAME_PROP, "2");
ElectionContext context = new ShardLeaderElectionContextBase(second, "slice1", "collection2", "dummynode2", props, zkStateReader);
second.setup(context);
second.joinElection(context, false);
Thread.sleep(1000);
assertEquals("original leader should have stayed leader", "http://127.0.0.1/solr/1/", getLeaderUrl("collection2", "slice1"));
firstContext.cancelElection();
Thread.sleep(1000);
assertEquals("new leader was not registered", "http://127.0.0.1/solr/2/", getLeaderUrl("collection2", "slice1"));
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class TestConfigsApi method testCommands.
public void testCommands() throws Exception {
ConfigSetsHandler handler = new ConfigSetsHandler(null) {
@Override
protected void sendToZk(SolrQueryResponse rsp, ConfigSetOperation operation, Map<String, Object> result) throws KeeperException, InterruptedException {
result.put(QUEUE_OPERATION, operation.action.toLower());
rsp.add(ZkNodeProps.class.getName(), new ZkNodeProps(result));
}
};
ApiBag apiBag = new ApiBag(false);
for (Api api : handler.getApis()) apiBag.register(api, EMPTY_MAP);
compareOutput(apiBag, "/cluster/configs/sample", DELETE, null, null, "{name :sample, operation:delete}");
compareOutput(apiBag, "/cluster/configs", POST, "{create:{name : newconf, baseConfigSet: sample }}", null, "{operation:create, name :newconf, baseConfigSet: sample, immutable: false }");
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class TestRandomRequestDistribution method testQueryAgainstDownReplica.
/**
* Asserts that requests against a collection are only served by a 'active' local replica
*/
private void testQueryAgainstDownReplica() throws Exception {
log.info("Creating collection 'football' with 1 shard and 2 replicas");
CollectionAdminRequest.createCollection("football", 1, 2).setCreateNodeSet(nodeNames.get(0) + ',' + nodeNames.get(1)).process(cloudClient);
waitForRecoveriesToFinish("football", true);
cloudClient.getZkStateReader().forceUpdateCollection("football");
Replica leader = null;
Replica notLeader = null;
Collection<Replica> replicas = cloudClient.getZkStateReader().getClusterState().getSlice("football", "shard1").getReplicas();
for (Replica replica : replicas) {
if (replica.getStr(ZkStateReader.LEADER_PROP) != null) {
leader = replica;
} else {
notLeader = replica;
}
}
//Simulate a replica being in down state.
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, notLeader.getStr(ZkStateReader.BASE_URL_PROP), ZkStateReader.NODE_NAME_PROP, notLeader.getStr(ZkStateReader.NODE_NAME_PROP), ZkStateReader.COLLECTION_PROP, "football", ZkStateReader.SHARD_ID_PROP, "shard1", ZkStateReader.CORE_NAME_PROP, notLeader.getStr(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ROLES_PROP, "", ZkStateReader.STATE_PROP, Replica.State.DOWN.toString());
log.info("Forcing {} to go into 'down' state", notLeader.getStr(ZkStateReader.CORE_NAME_PROP));
DistributedQueue q = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
q.offer(Utils.toJSON(m));
verifyReplicaStatus(cloudClient.getZkStateReader(), "football", "shard1", notLeader.getName(), Replica.State.DOWN);
//Query against the node which hosts the down replica
String baseUrl = notLeader.getStr(ZkStateReader.BASE_URL_PROP);
if (!baseUrl.endsWith("/"))
baseUrl += "/";
String path = baseUrl + "football";
log.info("Firing queries against path=" + path);
try (HttpSolrClient client = getHttpSolrClient(path)) {
client.setSoTimeout(5000);
client.setConnectionTimeout(2000);
SolrCore leaderCore = null;
for (JettySolrRunner jetty : jettys) {
CoreContainer container = jetty.getCoreContainer();
for (SolrCore core : container.getCores()) {
if (core.getName().equals(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
leaderCore = core;
break;
}
}
}
assertNotNull(leaderCore);
SolrMetricManager leaderMetricManager = leaderCore.getCoreContainer().getMetricManager();
String leaderRegistry = leaderCore.getCoreMetricManager().getRegistryName();
Counter cnt = leaderMetricManager.counter(null, leaderRegistry, "requests", "QUERY.standard");
// All queries should be served by the active replica
// To make sure that's true we keep querying the down replica
// If queries are getting processed by the down replica then the cluster state hasn't updated for that replica
// locally
// So we keep trying till it has updated and then verify if ALL queries go to the active replica
long count = 0;
while (true) {
count++;
client.query(new SolrQuery("*:*"));
long c = cnt.getCount();
if (c == 1) {
// cluster state has got update locally
break;
} else {
Thread.sleep(100);
}
if (count > 10000) {
fail("After 10k queries we still see all requests being processed by the down replica");
}
}
// Now we fire a few additional queries and make sure ALL of them
// are served by the active replica
int moreQueries = TestUtil.nextInt(random(), 4, 10);
// Since 1 query has already hit the leader
count = 1;
for (int i = 0; i < moreQueries; i++) {
client.query(new SolrQuery("*:*"));
count++;
long c = cnt.getCount();
assertEquals("Query wasn't served by leader", count, c);
}
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class ChaosMonkey method getRandomJetty.
public CloudJettyRunner getRandomJetty(String slice, boolean aggressivelyKillLeaders) throws KeeperException, InterruptedException {
int numActive = 0;
numActive = checkIfKillIsLegal(slice, numActive);
// TODO: stale state makes this a tough call
if (numActive < 2) {
// we cannot kill anyone
monkeyLog("only one active node in shard - monkey cannot kill :(");
return null;
}
// let's check the deadpool count
int numRunning = 0;
for (CloudJettyRunner cjetty : shardToJetty.get(slice)) {
if (!deadPool.contains(cjetty)) {
numRunning++;
}
}
if (numRunning < 2) {
// we cannot kill anyone
monkeyLog("only one active node in shard - monkey cannot kill :(");
return null;
}
boolean canKillIndexer = canKillIndexer(slice);
if (!canKillIndexer) {
monkeyLog("Number of indexer nodes (nrt or tlog replicas) is not enough to kill one of them, Will only choose a pull replica to kill");
}
int chance = chaosRandom.nextInt(10);
CloudJettyRunner cjetty = null;
if (chance <= 5 && aggressivelyKillLeaders && canKillIndexer) {
// if killLeader, really aggressively go after leaders
cjetty = shardToLeaderJetty.get(slice);
} else {
List<CloudJettyRunner> jetties = shardToJetty.get(slice);
// get random node
int attempt = 0;
while (true) {
attempt++;
int index = chaosRandom.nextInt(jetties.size());
cjetty = jetties.get(index);
if (canKillIndexer || getTypeForJetty(slice, cjetty) == Replica.Type.PULL) {
break;
} else if (attempt > 20) {
monkeyLog("Can't kill indexer nodes (nrt or tlog replicas) and couldn't find a random pull node after 20 attempts - monkey cannot kill :(");
return null;
}
}
ZkNodeProps leader = null;
try {
leader = zkStateReader.getLeaderRetry(collection, slice);
} catch (Throwable t) {
log.error("Could not get leader", t);
return null;
}
// cluster state can be stale - also go by our 'near real-time' is leader prop
boolean rtIsLeader;
CoreContainer cc = cjetty.jetty.getCoreContainer();
if (cc != null) {
try (SolrCore core = cc.getCore(leader.getStr(ZkStateReader.CORE_NAME_PROP))) {
if (core == null) {
monkeyLog("selected jetty not running correctly - skip");
return null;
}
rtIsLeader = core.getCoreDescriptor().getCloudDescriptor().isLeader();
}
} else {
return null;
}
boolean isLeader = leader.getStr(ZkStateReader.NODE_NAME_PROP).equals(cjetty.nodeName) || rtIsLeader;
if (!aggressivelyKillLeaders && isLeader) {
// we don't kill leaders...
monkeyLog("abort! I don't kill leaders");
return null;
}
}
if (cjetty.jetty.getLocalPort() == -1) {
// we can't kill the dead
monkeyLog("abort! This guy is already dead");
return null;
}
//System.out.println("num active:" + numActive + " for " + slice + " sac:" + jetty.getLocalPort());
monkeyLog("chose a victim! " + cjetty.jetty.getLocalPort());
return cjetty;
}
Aggregations