use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ShardSplitTest method assertConsistentReplicas.
private int assertConsistentReplicas(Slice shard) throws SolrServerException, IOException {
long numFound = Long.MIN_VALUE;
int count = 0;
for (Replica replica : shard.getReplicas()) {
HttpSolrClient client = new HttpSolrClient.Builder(replica.getCoreUrl()).withHttpClient(cloudClient.getLbClient().getHttpClient()).build();
QueryResponse response = client.query(new SolrQuery("q", "*:*", "distrib", "false"));
log.info("Found numFound={} on replica: {}", response.getResults().getNumFound(), replica.getCoreUrl());
if (numFound == Long.MIN_VALUE) {
numFound = response.getResults().getNumFound();
} else {
assertEquals("Shard " + shard.getName() + " replicas do not have same number of documents", numFound, response.getResults().getNumFound());
}
count++;
}
return count;
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ShardSplitTest method testSplitStaticIndexReplication.
/*
Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
Add a replica. Ensure count matches in leader and replica.
*/
public void testSplitStaticIndexReplication() throws Exception {
waitForThingsToLevelOut(15);
DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
Replica replica = defCol.getReplicas().get(0);
String nodeName = replica.getNodeName();
String collectionName = "testSplitStaticIndexReplication";
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
// some high number so we can create replicas without hindrance
create.setMaxShardsPerNode(5);
// we want to create the leader on a fixed node so that we know which one to restart later
create.setCreateNodeSet(nodeName);
create.process(cloudClient);
try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
client.setDefaultCollection(collectionName);
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
try {
thread.start();
// give the indexer sometime to do its work
Thread.sleep(1000);
thread.safeStop();
thread.join();
client.commit();
controlClient.commit();
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
splitShard.setShardName(SHARD1);
String asyncId = splitShard.processAsync(client);
RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
if (state == RequestStatusState.COMPLETED) {
waitForRecoveriesToFinish(collectionName, true);
// let's wait to see parent shard become inactive
CountDownLatch latch = new CountDownLatch(1);
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
Slice parent = collectionState.getSlice(SHARD1);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null && parent.getState() == Slice.State.INACTIVE && slice10.getState() == Slice.State.ACTIVE && slice11.getState() == Slice.State.ACTIVE) {
latch.countDown();
// removes the watch
return true;
}
return false;
}
});
latch.await(1, TimeUnit.MINUTES);
if (latch.getCount() != 0) {
// sanity check
fail("Sub-shards did not become active even after waiting for 1 minute");
}
int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
// restart the sub-shard leader node
boolean restarted = false;
for (JettySolrRunner jetty : jettys) {
int port = jetty.getBaseUrl().getPort();
if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
ChaosMonkey.kill(jetty);
ChaosMonkey.start(jetty);
restarted = true;
break;
}
}
if (!restarted) {
// sanity check
fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
}
// add a new replica for the sub-shard
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
// use control client because less chances of it being the node being restarted
// this is to avoid flakiness of test because of NoHttpResponseExceptions
String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
state = addReplica.processAndWait(control, 30);
}
if (state == RequestStatusState.COMPLETED) {
CountDownLatch newReplicaLatch = new CountDownLatch(1);
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
if (liveNodes.size() != liveNodeCount) {
return false;
}
Slice slice = collectionState.getSlice(SHARD1_0);
if (slice.getReplicas().size() == 2) {
if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
// we see replicas and none of them are recovering
newReplicaLatch.countDown();
return true;
}
}
return false;
}
});
newReplicaLatch.await(30, TimeUnit.SECONDS);
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
// handle new shards/replica so well.
ClusterState clusterState = client.getZkStateReader().getClusterState();
DocCollection collection = clusterState.getCollection(collectionName);
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
} else {
fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
}
} else {
fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
}
} finally {
thread.safeStop();
thread.join();
}
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ShardSplitTest method checkSubShardConsistency.
protected void checkSubShardConsistency(String shard) throws SolrServerException, IOException {
SolrQuery query = new SolrQuery("*:*").setRows(1000).setFields("id", "_version_");
query.set("distrib", false);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
Slice slice = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, shard);
long[] numFound = new long[slice.getReplicasMap().size()];
int c = 0;
for (Replica replica : slice.getReplicas()) {
String coreUrl = new ZkCoreNodeProps(replica).getCoreUrl();
QueryResponse response;
try (HttpSolrClient client = getHttpSolrClient(coreUrl)) {
response = client.query(query);
}
numFound[c++] = response.getResults().getNumFound();
log.info("Shard: " + shard + " Replica: {} has {} docs", coreUrl, String.valueOf(response.getResults().getNumFound()));
assertTrue("Shard: " + shard + " Replica: " + coreUrl + " has 0 docs", response.getResults().getNumFound() > 0);
}
for (int i = 0; i < slice.getReplicasMap().size(); i++) {
assertEquals(shard + " is not consistent", numFound[0], numFound[i]);
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class DistribDocExpirationUpdateProcessorTest method getIndexVersionOfAllReplicas.
/**
* returns a map whose key is the coreNodeName and whose value is what the replication
* handler returns for the indexversion
*/
private Map<String, Long> getIndexVersionOfAllReplicas() throws IOException, SolrServerException {
Map<String, Long> results = new HashMap<String, Long>();
DocCollection collectionState = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION);
for (Replica replica : collectionState.getReplicas()) {
String coreName = replica.getCoreName();
try (HttpSolrClient client = getHttpSolrClient(replica.getCoreUrl())) {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("command", "indexversion");
params.set("_trace", "getIndexVersion");
params.set("qt", ReplicationHandler.PATH);
QueryRequest req = new QueryRequest(params);
NamedList<Object> res = client.request(req);
assertNotNull("null response from server: " + coreName, res);
Object version = res.get("indexversion");
assertNotNull("null version from server: " + coreName, version);
assertTrue("version isn't a long: " + coreName, version instanceof Long);
results.put(coreName, (Long) version);
long numDocs = client.query(params("q", "*:*", "distrib", "false", "rows", "0", "_trace", "counting_docs")).getResults().getNumFound();
log.info("core=" + coreName + "; ver=" + version + "; numDocs=" + numDocs);
}
}
return results;
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class DistribDocExpirationUpdateProcessorTest method test.
@Test
public void test() throws Exception {
// some docs with no expiration
UpdateRequest req1 = new UpdateRequest();
for (int i = 1; i <= 100; i++) {
req1.add(sdoc("id", i));
}
req1.commit(cluster.getSolrClient(), COLLECTION);
// this doc better not already exist
waitForNoResults(0, params("q", "id:999", "rows", "0", "_trace", "sanity_check"));
// record the indexversion for each server so we can check later
// that it only changes for one shard
final Map<String, Long> initIndexVersions = getIndexVersionOfAllReplicas();
assertTrue("WTF? no versions?", 0 < initIndexVersions.size());
// add a doc with a short TTL
new UpdateRequest().add(sdoc("id", "999", "tTl_s", "+30SECONDS")).commit(cluster.getSolrClient(), COLLECTION);
// wait for one doc to be deleted
waitForNoResults(180, params("q", "id:999", "rows", "0", "_trace", "did_it_expire_yet"));
// verify only one shard changed
final Map<String, Long> finalIndexVersions = getIndexVersionOfAllReplicas();
assertEquals("WTF? not same num versions?", initIndexVersions.size(), finalIndexVersions.size());
final Set<String> nodesThatChange = new HashSet<String>();
final Set<String> shardsThatChange = new HashSet<String>();
int coresCompared = 0;
DocCollection collectionState = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION);
for (Replica replica : collectionState.getReplicas()) {
coresCompared++;
String name = replica.getName();
String core = replica.getCoreName();
Long initVersion = initIndexVersions.get(core);
Long finalVersion = finalIndexVersions.get(core);
assertNotNull(name + ": no init version for core: " + core, initVersion);
assertNotNull(name + ": no final version for core: " + core, finalVersion);
if (!initVersion.equals(finalVersion)) {
nodesThatChange.add(core + "(" + name + ")");
shardsThatChange.add(name);
}
}
assertEquals("Exactly one shard should have changed, instead: " + shardsThatChange + " nodes=(" + nodesThatChange + ")", 1, shardsThatChange.size());
assertEquals("somehow we missed some cores?", initIndexVersions.size(), coresCompared);
// TODO: above logic verifies that deleteByQuery happens on all nodes, and ...
// doesn't affect searcher re-open on shards w/o expired docs ... can we also verify
// that *only* one node is sending the deletes ?
// (ie: no flood of redundant deletes?)
}
Aggregations