use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class AbstractFullDistribZkTestBase method queryAndCompareReplicas.
/**
* Executes a query against each live and active replica of the specified shard
* and aserts that the results are identical.
*
* @see #queryAndCompare
*/
public QueryResponse queryAndCompareReplicas(SolrParams params, String shard) throws Exception {
ArrayList<SolrClient> shardClients = new ArrayList<>(7);
updateMappingsFromZk(jettys, clients);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
List<CloudJettyRunner> solrJetties = shardToJetty.get(shard);
assertNotNull("no jetties found for shard: " + shard, solrJetties);
for (CloudJettyRunner cjetty : solrJetties) {
ZkNodeProps props = cjetty.info;
String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP);
boolean active = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP)) == Replica.State.ACTIVE;
boolean live = zkStateReader.getClusterState().liveNodesContain(nodeName);
if (active && live) {
shardClients.add(cjetty.client.solrClient);
}
}
return queryAndCompare(params, shardClients);
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class AbstractFullDistribZkTestBase method getLeaderUrlFromZk.
protected ZkCoreNodeProps getLeaderUrlFromZk(String collection, String slice) {
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
ZkNodeProps leader = clusterState.getLeader(collection, slice);
if (leader == null) {
throw new RuntimeException("Could not find leader:" + collection + " " + slice);
}
return new ZkCoreNodeProps(leader);
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class OverseerElectionContext method runLeaderProcess.
/*
* weAreReplacement: has someone else been the leader already?
*/
@Override
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException, InterruptedException, IOException {
String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
ActionThrottle lt;
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
if (cc.isShutDown()) {
return;
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
}
}
MDCLoggingContext.setCore(core);
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
}
try {
lt.minimumWaitBetweenActions();
lt.markAttemptingAction();
int leaderVoteWait = cc.getZkController().getLeaderVoteWait();
log.debug("Running the leader process for shard={} and weAreReplacement={} and leaderVoteWait={}", shardId, weAreReplacement, leaderVoteWait);
// clear the leader in clusterstate
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(), ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection);
Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m));
boolean allReplicasInLine = false;
if (!weAreReplacement) {
allReplicasInLine = waitForReplicasToComeUp(leaderVoteWait);
} else {
allReplicasInLine = areAllReplicasParticipating();
}
if (isClosed) {
// re-register the cores and handle a new leadership election.
return;
}
Replica.Type replicaType;
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
if (!zkController.getCoreContainer().isShutDown()) {
cancelElection();
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
} else {
return;
}
}
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
// should I be leader?
if (weAreReplacement && !shouldIBeLeader(leaderProps, core, weAreReplacement)) {
rejoinLeaderElection(core);
return;
}
log.info("I may be the new leader - try and sync");
// we are going to attempt to be the leader
// first cancel any current recovery
core.getUpdateHandler().getSolrCoreState().cancelRecovery();
if (weAreReplacement) {
// wait a moment for any floating updates to finish
try {
Thread.sleep(2500);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, e);
}
}
PeerSync.PeerSyncResult result = null;
boolean success = false;
try {
result = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
success = result.isSuccess();
} catch (Exception e) {
SolrException.log(log, "Exception while trying to sync", e);
result = PeerSync.PeerSyncResult.failure();
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (!success) {
boolean hasRecentUpdates = false;
if (ulog != null) {
// TODO: we could optimize this if necessary
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
hasRecentUpdates = !recentUpdates.getVersions(1).isEmpty();
}
}
if (!hasRecentUpdates) {
// before, so become leader anyway if no one else has any versions either
if (result.getOtherHasVersions().orElse(false)) {
log.info("We failed sync, but we have no versions - we can't sync in that case. But others have some versions, so we should not become leader");
success = false;
} else {
log.info("We failed sync, but we have no versions - we can't sync in that case - we were active before, so become leader anyway");
success = true;
}
}
}
// solrcloud_debug
if (log.isDebugEnabled()) {
try {
RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
SolrIndexSearcher searcher = searchHolder.get();
try {
log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched " + searcher.search(new MatchAllDocsQuery(), 1).totalHits);
} finally {
searchHolder.decref();
}
} catch (Exception e) {
log.error("Error in solrcloud_debug block", e);
}
}
if (!success) {
rejoinLeaderElection(core);
return;
}
}
boolean isLeader = true;
if (!isClosed) {
try {
// we must check LIR before registering as leader
checkLIR(coreName, allReplicasInLine);
if (replicaType == Replica.Type.TLOG) {
// stop replicate from old leader
zkController.stopReplicationFromLeader(coreName);
if (weAreReplacement) {
try (SolrCore core = cc.getCore(coreName)) {
Future<UpdateLog.RecoveryInfo> future = core.getUpdateHandler().getUpdateLog().recoverFromCurrentLog();
if (future != null) {
log.info("Replaying tlog before become new leader");
future.get();
} else {
log.info("New leader does not have old tlog to replay");
}
}
}
}
super.runLeaderProcess(weAreReplacement, 0);
try (SolrCore core = cc.getCore(coreName)) {
if (core != null) {
core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
publishActiveIfRegisteredAndNotActive(core);
} else {
return;
}
}
log.info("I am the new leader: " + ZkCoreNodeProps.getCoreUrl(leaderProps) + " " + shardId);
// we made it as leader - send any recovery requests we need to
syncStrategy.requestRecoveries();
} catch (Exception e) {
isLeader = false;
SolrException.log(log, "There was a problem trying to register as the leader", e);
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
log.debug("SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
return;
}
core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
// we could not publish ourselves as leader - try and rejoin election
rejoinLeaderElection(core);
}
}
if (isLeader) {
// check for any replicas in my shard that were set to down by the previous leader
try {
startLeaderInitiatedRecoveryOnReplicas(coreName);
} catch (Exception exc) {
// don't want leader election to fail because of
// an error trying to tell others to recover
}
}
} else {
cancelElection();
}
} finally {
MDCLoggingContext.clear();
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class ConfigSetsHandler method sendToZk.
protected void sendToZk(SolrQueryResponse rsp, ConfigSetOperation operation, Map<String, Object> result) throws KeeperException, InterruptedException {
if (result != null) {
// We need to differentiate between collection and configsets actions since they currently
// use the same underlying queue.
result.put(QUEUE_OPERATION, CONFIGSETS_ACTION_PREFIX + operation.action.toLower());
ZkNodeProps props = new ZkNodeProps(result);
handleResponse(operation.action.toLower(), props, rsp, DEFAULT_ZK_TIMEOUT);
}
}
use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.
the class RebalanceLeaders method rejoinElection.
private void rejoinElection(String collectionName, Slice slice, String electionNode, String core, boolean rejoinAtHead) throws KeeperException, InterruptedException {
Replica replica = slice.getReplica(LeaderElector.getNodeName(electionNode));
Map<String, Object> propMap = new HashMap<>();
propMap.put(COLLECTION_PROP, collectionName);
propMap.put(SHARD_ID_PROP, slice.getName());
propMap.put(QUEUE_OPERATION, REBALANCELEADERS.toLower());
propMap.put(CORE_NAME_PROP, core);
propMap.put(CORE_NODE_NAME_PROP, replica.getName());
propMap.put(ZkStateReader.BASE_URL_PROP, replica.getProperties().get(ZkStateReader.BASE_URL_PROP));
// Get ourselves to be first in line.
propMap.put(REJOIN_AT_HEAD_PROP, Boolean.toString(rejoinAtHead));
propMap.put(ELECTION_NODE_PROP, electionNode);
String asyncId = REBALANCELEADERS.toLower() + "_" + core + "_" + Math.abs(System.nanoTime());
propMap.put(ASYNC, asyncId);
ZkNodeProps m = new ZkNodeProps(propMap);
// I'm constructing my own response
SolrQueryResponse rspIgnore = new SolrQueryResponse();
// Want to construct my own response here.
collectionsHandler.handleResponse(REBALANCELEADERS.toLower(), m, rspIgnore);
}
Aggregations