use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ReplicaPropertiesBase method verifyPropertyNotPresent.
public static void verifyPropertyNotPresent(CloudSolrClient client, String collectionName, String replicaName, String property) throws KeeperException, InterruptedException {
ClusterState clusterState = null;
Replica replica = null;
for (int idx = 0; idx < 300; ++idx) {
clusterState = client.getZkStateReader().getClusterState();
replica = clusterState.getReplica(collectionName, replicaName);
if (replica == null) {
fail("Could not find collection/replica pair! " + collectionName + "/" + replicaName);
}
if (StringUtils.isBlank(replica.getStr(property)))
return;
Thread.sleep(100);
}
fail("Property " + property + " not set correctly for collection/replica pair: " + collectionName + "/" + replicaName + ". Replica props: " + replica.getProperties().toString() + ". Cluster state is " + clusterState.toString());
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ReplicationFactorTest method testRf3.
protected void testRf3() throws Exception {
int numShards = 1;
int replicationFactor = 3;
int maxShardsPerNode = 1;
String testCollectionName = "repfacttest_c8n_1x3";
String shardId = "shard1";
int minRf = 2;
createCollection(testCollectionName, numShards, replicationFactor, maxShardsPerNode);
cloudClient.setDefaultCollection(testCollectionName);
List<Replica> replicas = ensureAllReplicasAreActive(testCollectionName, shardId, numShards, replicationFactor, 30);
assertTrue("Expected 2 active replicas for " + testCollectionName, replicas.size() == 2);
int rf = sendDoc(1, minRf);
assertRf(3, "all replicas should be active", rf);
getProxyForReplica(replicas.get(0)).close();
rf = sendDoc(2, minRf);
assertRf(2, "one replica should be down", rf);
getProxyForReplica(replicas.get(1)).close();
rf = sendDoc(3, minRf);
assertRf(1, "both replicas should be down", rf);
// heal the partitions
getProxyForReplica(replicas.get(0)).reopen();
getProxyForReplica(replicas.get(1)).reopen();
// give time for the healed partition to get propagated
Thread.sleep(2000);
ensureAllReplicasAreActive(testCollectionName, shardId, numShards, replicationFactor, 30);
rf = sendDoc(4, minRf);
assertRf(3, "partitions to replicas have been healed", rf);
// now send a batch
List<SolrInputDocument> batch = new ArrayList<SolrInputDocument>(10);
for (int i = 5; i < 15; i++) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(i));
doc.addField("a_t", "hello" + i);
batch.add(doc);
}
int batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
assertRf(3, "batch should have succeeded on all replicas", batchRf);
// add some chaos to the batch
getProxyForReplica(replicas.get(0)).close();
// now send a batch
batch = new ArrayList<SolrInputDocument>(10);
for (int i = 15; i < 30; i++) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(i));
doc.addField("a_t", "hello" + i);
batch.add(doc);
}
batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
assertRf(2, "batch should have succeeded on 2 replicas (only one replica should be down)", batchRf);
// close the 2nd replica, and send a 3rd batch with expected achieved rf=1
getProxyForReplica(replicas.get(1)).close();
batch = new ArrayList<SolrInputDocument>(10);
for (int i = 30; i < 45; i++) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField(id, String.valueOf(i));
doc.addField("a_t", "hello" + i);
batch.add(doc);
}
batchRf = sendDocsWithRetry(batch, minRf, 5, 1);
assertRf(1, "batch should have succeeded on the leader only (both replicas should be down)", batchRf);
getProxyForReplica(replicas.get(0)).reopen();
getProxyForReplica(replicas.get(1)).reopen();
Thread.sleep(2000);
ensureAllReplicasAreActive(testCollectionName, shardId, numShards, replicationFactor, 30);
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ForceLeaderTest method setReplicaState.
protected void setReplicaState(String collection, String slice, Replica replica, Replica.State state) throws SolrServerException, IOException, KeeperException, InterruptedException {
DistributedQueue inQueue = Overseer.getStateUpdateQueue(cloudClient.getZkStateReader().getZkClient());
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
String baseUrl = zkStateReader.getBaseUrlForNodeName(replica.getNodeName());
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.STATE.toLower(), ZkStateReader.BASE_URL_PROP, baseUrl, ZkStateReader.NODE_NAME_PROP, replica.getNodeName(), ZkStateReader.SHARD_ID_PROP, slice, ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.CORE_NAME_PROP, replica.getStr(CORE_NAME_PROP), ZkStateReader.CORE_NODE_NAME_PROP, replica.getName(), ZkStateReader.STATE_PROP, state.toString());
inQueue.offer(Utils.toJSON(m));
boolean transition = false;
Replica.State replicaState = null;
for (int counter = 10; counter > 0; counter--) {
ClusterState clusterState = zkStateReader.getClusterState();
replicaState = clusterState.getSlice(collection, slice).getReplica(replica.getName()).getState();
if (replicaState == state) {
transition = true;
break;
}
Thread.sleep(1000);
}
if (!transition) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not set replica [" + replica.getName() + "] as " + state + ". Last known state of the replica: " + replicaState);
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class ForceLeaderTest method testLastPublishedStateIsActive.
/**
* Test that FORCELEADER can set last published state of all down (live) replicas to active (so
* that they become worthy candidates for leader election).
*/
@Slow
public void testLastPublishedStateIsActive() throws Exception {
handle.put("maxScore", SKIPVAL);
handle.put("timestamp", SKIPVAL);
String testCollectionName = "forceleader_last_published";
createCollection(testCollectionName, 1, 3, 1);
cloudClient.setDefaultCollection(testCollectionName);
log.info("Collection created: " + testCollectionName);
try {
List<Replica> notLeaders = ensureAllReplicasAreActive(testCollectionName, SHARD1, 1, 3, maxWaitSecsToSeeAllActive);
assertEquals("Expected 2 replicas for collection " + testCollectionName + " but found " + notLeaders.size() + "; clusterState: " + printClusterStateInfo(testCollectionName), 2, notLeaders.size());
Replica leader = cloudClient.getZkStateReader().getLeaderRetry(testCollectionName, SHARD1);
JettySolrRunner notLeader0 = getJettyOnPort(getReplicaPort(notLeaders.get(0)));
ZkController zkController = notLeader0.getCoreContainer().getZkController();
// Mark all replicas down
setReplicaState(testCollectionName, SHARD1, leader, State.DOWN);
for (Replica rep : notLeaders) {
setReplicaState(testCollectionName, SHARD1, rep, State.DOWN);
}
zkController.getZkStateReader().forceUpdateCollection(testCollectionName);
// Assert all replicas are down and that there is no leader
assertEquals(0, getActiveOrRecoveringReplicas(testCollectionName, SHARD1).size());
// Now force leader
doForceLeader(cloudClient, testCollectionName, SHARD1);
// Assert that last published states of the two replicas are active now
for (Replica rep : notLeaders) {
assertEquals(Replica.State.ACTIVE, getLastPublishedState(testCollectionName, SHARD1, rep));
}
} finally {
log.info("Cleaning up after the test.");
attemptCollectionDelete(cloudClient, testCollectionName);
}
}
use of org.apache.solr.common.cloud.Replica in project lucene-solr by apache.
the class RecoveryAfterSoftCommitTest method test.
@Test
public void test() throws Exception {
waitForRecoveriesToFinish(DEFAULT_COLLECTION, true);
// flush twice
int i = 0;
for (; i < MAX_BUFFERED_DOCS + 1; i++) {
SolrInputDocument document = new SolrInputDocument();
document.addField("id", String.valueOf(i));
document.addField("a_t", "text_" + i);
cloudClient.add(document);
}
// soft-commit so searchers are open on un-committed but flushed segment files
AbstractUpdateRequest request = new UpdateRequest().setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true, true);
cloudClient.request(request);
Replica notLeader = ensureAllReplicasAreActive(DEFAULT_COLLECTION, "shard1", 1, 2, 30).get(0);
// ok, now introduce a network partition between the leader and the replica
SocketProxy proxy = getProxyForReplica(notLeader);
proxy.close();
// add more than ULOG_NUM_RECORDS_TO_KEEP docs so that peer sync cannot be used for recovery
int MAX_DOCS = 2 + MAX_BUFFERED_DOCS + ULOG_NUM_RECORDS_TO_KEEP;
for (; i < MAX_DOCS; i++) {
SolrInputDocument document = new SolrInputDocument();
document.addField("id", String.valueOf(i));
document.addField("a_t", "text_" + i);
cloudClient.add(document);
}
// Have the partition last at least 1 sec
// While this gives the impression that recovery is timing related, this is
// really only
// to give time for the state to be written to ZK before the test completes.
// In other words,
// without a brief pause, the test finishes so quickly that it doesn't give
// time for the recovery process to kick-in
Thread.sleep(2000L);
proxy.reopen();
List<Replica> notLeaders = ensureAllReplicasAreActive(DEFAULT_COLLECTION, "shard1", 1, 2, 30);
}
Aggregations