use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class BasicDistributedZkTest method getLeaderUrlFromZk.
protected ZkCoreNodeProps getLeaderUrlFromZk(String collection, String slice) {
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
ZkNodeProps leader = clusterState.getLeader(collection, slice);
if (leader == null) {
throw new RuntimeException("Could not find leader:" + collection + " " + slice);
}
return new ZkCoreNodeProps(leader);
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class BasicDistributedZkTest method testANewCollectionInOneInstanceWithManualShardAssignement.
private void testANewCollectionInOneInstanceWithManualShardAssignement() throws Exception {
log.info("### STARTING testANewCollectionInOneInstanceWithManualShardAssignement");
System.clearProperty("numShards");
List<SolrClient> collectionClients = new ArrayList<>();
SolrClient client = clients.get(0);
final String baseUrl = ((HttpSolrClient) client).getBaseURL().substring(0, ((HttpSolrClient) client).getBaseURL().length() - DEFAULT_COLLECTION.length() - 1);
createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
createSolrCore(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
while (pending != null && pending.size() > 0) {
Future<Object> future = completionService.take();
pending.remove(future);
}
SolrClient client1 = collectionClients.get(0);
SolrClient client2 = collectionClients.get(1);
SolrClient client3 = collectionClients.get(2);
SolrClient client4 = collectionClients.get(3);
// no one should be recovering
waitForRecoveriesToFinish(oneInstanceCollection2, getCommonCloudSolrClient().getZkStateReader(), false, true);
assertAllActive(oneInstanceCollection2, getCommonCloudSolrClient().getZkStateReader());
//printLayout();
// TODO: enable when we don't falsely get slice1...
// solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice1", 30000);
// solrj.getZkStateReader().getLeaderUrl(oneInstanceCollection2, "slice2", 30000);
client2.add(getDoc(id, "1"));
client3.add(getDoc(id, "2"));
client4.add(getDoc(id, "3"));
client1.commit();
SolrQuery query = new SolrQuery("*:*");
query.set("distrib", false);
long oneDocs = client1.query(query).getResults().getNumFound();
long twoDocs = client2.query(query).getResults().getNumFound();
long threeDocs = client3.query(query).getResults().getNumFound();
long fourDocs = client4.query(query).getResults().getNumFound();
query.set("collection", oneInstanceCollection2);
query.set("distrib", true);
long allDocs = getCommonCloudSolrClient().query(query).getResults().getNumFound();
// System.out.println("1:" + oneDocs);
// System.out.println("2:" + twoDocs);
// System.out.println("3:" + threeDocs);
// System.out.println("4:" + fourDocs);
// System.out.println("All Docs:" + allDocs);
// assertEquals(oneDocs, threeDocs);
// assertEquals(twoDocs, fourDocs);
// assertNotSame(oneDocs, twoDocs);
assertEquals(3, allDocs);
// we added a role of none on these creates - check for it
ZkStateReader zkStateReader = getCommonCloudSolrClient().getZkStateReader();
zkStateReader.forceUpdateCollection(oneInstanceCollection2);
Map<String, Slice> slices = zkStateReader.getClusterState().getSlicesMap(oneInstanceCollection2);
assertNotNull(slices);
String roles = slices.get("slice1").getReplicasMap().values().iterator().next().getStr(ZkStateReader.ROLES_PROP);
assertEquals("none", roles);
ZkCoreNodeProps props = new ZkCoreNodeProps(getCommonCloudSolrClient().getZkStateReader().getClusterState().getLeader(oneInstanceCollection2, "slice1"));
// now test that unloading a core gets us a new leader
try (HttpSolrClient unloadClient = getHttpSolrClient(baseUrl)) {
unloadClient.setConnectionTimeout(15000);
unloadClient.setSoTimeout(60000);
Unload unloadCmd = new Unload(true);
unloadCmd.setCoreName(props.getCoreName());
String leader = props.getCoreUrl();
unloadClient.request(unloadCmd);
int tries = 50;
while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "slice1", 10000))) {
Thread.sleep(100);
if (tries-- == 0) {
fail("Leader never changed");
}
}
}
IOUtils.close(collectionClients);
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class OverseerElectionContext method startLeaderInitiatedRecoveryOnReplicas.
private void startLeaderInitiatedRecoveryOnReplicas(String coreName) throws Exception {
try (SolrCore core = cc.getCore(coreName)) {
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
String coll = cloudDesc.getCollectionName();
String shardId = cloudDesc.getShardId();
String coreNodeName = cloudDesc.getCoreNodeName();
if (coll == null || shardId == null) {
log.error("Cannot start leader-initiated recovery on new leader (core=" + coreName + ",coreNodeName=" + coreNodeName + ") because collection and/or shard is null!");
return;
}
String znodePath = zkController.getLeaderInitiatedRecoveryZnodePath(coll, shardId);
List<String> replicas = null;
try {
replicas = zkClient.getChildren(znodePath, null, false);
} catch (NoNodeException nne) {
// this can be ignored
}
if (replicas != null && replicas.size() > 0) {
for (String replicaCoreNodeName : replicas) {
if (coreNodeName.equals(replicaCoreNodeName))
// added safe-guard so we don't mark this core as down
continue;
final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(coll, shardId, replicaCoreNodeName);
if (lirState == Replica.State.DOWN || lirState == Replica.State.RECOVERY_FAILED) {
log.info("After core={} coreNodeName={} was elected leader, a replica coreNodeName={} was found in state: " + lirState.toString() + " and needing recovery.", coreName, coreNodeName, replicaCoreNodeName);
List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader().getReplicaProps(collection, shardId, coreNodeName);
if (replicaProps != null && replicaProps.size() > 0) {
ZkCoreNodeProps coreNodeProps = null;
for (ZkCoreNodeProps p : replicaProps) {
if (((Replica) p.getNodeProps()).getName().equals(replicaCoreNodeName)) {
coreNodeProps = p;
break;
}
}
zkController.ensureReplicaInLeaderInitiatedRecovery(cc, collection, shardId, coreNodeProps, core.getCoreDescriptor(), false);
}
}
}
}
}
// core gets closed automagically
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class SyncStrategy method syncToMe.
private void syncToMe(ZkController zkController, String collection, String shardId, ZkNodeProps leaderProps, CoreDescriptor cd, int nUpdates) {
// sync everyone else
// TODO: we should do this in parallel at least
List<ZkCoreNodeProps> nodes = zkController.getZkStateReader().getReplicaProps(collection, shardId, cd.getCloudDescriptor().getCoreNodeName());
if (nodes == null) {
log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + " has no replicas");
return;
}
ZkCoreNodeProps zkLeader = new ZkCoreNodeProps(leaderProps);
for (ZkCoreNodeProps node : nodes) {
try {
log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": try and ask " + node.getCoreUrl() + " to sync");
requestSync(node.getBaseUrl(), node.getCoreUrl(), zkLeader.getCoreUrl(), node.getCoreName(), nUpdates);
} catch (Exception e) {
SolrException.log(log, "Error syncing replica to leader", e);
}
}
for (; ; ) {
ShardResponse srsp = shardHandler.takeCompletedOrError();
if (srsp == null)
break;
boolean success = handleResponse(srsp);
if (srsp.getException() != null) {
SolrException.log(log, "Sync request error: " + srsp.getException());
}
if (!success) {
log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": Sync failed - we will ask replica (" + srsp.getShardAddress() + ") to recover.");
if (isClosed) {
log.info("We have been closed, don't request that a replica recover");
} else {
RecoveryRequest rr = new RecoveryRequest();
rr.leaderProps = leaderProps;
rr.baseUrl = ((ShardCoreRequest) srsp.getShardRequest()).baseUrl;
rr.coreName = ((ShardCoreRequest) srsp.getShardRequest()).coreName;
recoveryRequests.add(rr);
}
} else {
log.info(ZkCoreNodeProps.getCoreUrl(leaderProps) + ": " + " sync completed with " + srsp.getShardAddress());
}
}
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class ZkController method getLeaderProps.
/**
* Get leader props directly from zk nodes.
*
* @return leader props
*/
public ZkCoreNodeProps getLeaderProps(final String collection, final String slice, int timeoutms, boolean failImmediatelyOnExpiration) throws InterruptedException {
int iterCount = timeoutms / 1000;
Exception exp = null;
while (iterCount-- > 0) {
try {
byte[] data = zkClient.getData(ZkStateReader.getShardLeadersPath(collection, slice), null, null, true);
ZkCoreNodeProps leaderProps = new ZkCoreNodeProps(ZkNodeProps.load(data));
return leaderProps;
} catch (InterruptedException e) {
throw e;
} catch (SessionExpiredException e) {
if (failImmediatelyOnExpiration) {
throw new RuntimeException("Session has expired - could not get leader props", exp);
}
exp = e;
Thread.sleep(1000);
} catch (Exception e) {
exp = e;
Thread.sleep(1000);
}
if (cc.isShutDown()) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "CoreContainer is closed");
}
}
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp);
}
Aggregations