use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class ZkController method waitForLeaderToSeeDownState.
private ZkCoreNodeProps waitForLeaderToSeeDownState(CoreDescriptor descriptor, final String coreZkNodeName) {
// try not to wait too long here - if we are waiting too long, we should probably
// move along and join the election
CloudDescriptor cloudDesc = descriptor.getCloudDescriptor();
String collection = cloudDesc.getCollectionName();
String shard = cloudDesc.getShardId();
ZkCoreNodeProps leaderProps = null;
int retries = 2;
for (int i = 0; i < retries; i++) {
try {
if (isClosed) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed");
}
// go straight to zk, not the cloud state - we want current info
leaderProps = getLeaderProps(collection, shard, 5000);
break;
} catch (Exception e) {
SolrException.log(log, "There was a problem finding the leader in zk", e);
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
}
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem finding the leader in zk");
}
}
}
String leaderBaseUrl = leaderProps.getBaseUrl();
String leaderCoreName = leaderProps.getCoreName();
String myCoreNodeName = cloudDesc.getCoreNodeName();
String myCoreName = descriptor.getName();
String ourUrl = ZkCoreNodeProps.getCoreUrl(getBaseUrl(), myCoreName);
boolean isLeader = leaderProps.getCoreUrl().equals(ourUrl);
if (!isLeader && !SKIP_AUTO_RECOVERY) {
// detect if this core is in leader-initiated recovery and if so,
// then we don't need the leader to wait on seeing the down state
Replica.State lirState = null;
try {
lirState = getLeaderInitiatedRecoveryState(collection, shard, myCoreNodeName);
} catch (Exception exc) {
log.error("Failed to determine if replica " + myCoreNodeName + " is in leader-initiated recovery due to: " + exc, exc);
}
if (lirState != null) {
log.debug("Replica " + myCoreNodeName + " is already in leader-initiated recovery, so not waiting for leader to see down state.");
} else {
log.info("Replica " + myCoreNodeName + " NOT in leader-initiated recovery, need to wait for leader to see down state.");
try (HttpSolrClient client = new Builder(leaderBaseUrl).build()) {
client.setConnectionTimeout(15000);
client.setSoTimeout(120000);
WaitForState prepCmd = new WaitForState();
prepCmd.setCoreName(leaderCoreName);
prepCmd.setNodeName(getNodeName());
prepCmd.setCoreNodeName(coreZkNodeName);
prepCmd.setState(Replica.State.DOWN);
// let's retry a couple times - perhaps the leader just went down,
// or perhaps he is just not quite ready for us yet
retries = 2;
for (int i = 0; i < retries; i++) {
if (isClosed) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed");
}
try {
client.request(prepCmd);
break;
} catch (Exception e) {
// if the core container is shutdown, don't wait
if (cc.isShutDown()) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Core container is shutdown.");
}
Throwable rootCause = SolrException.getRootCause(e);
if (rootCause instanceof IOException) {
// if there was a communication error talking to the leader, see if the leader is even alive
if (!zkStateReader.getClusterState().liveNodesContain(leaderProps.getNodeName())) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Node " + leaderProps.getNodeName() + " hosting leader for " + shard + " in " + collection + " is not live!");
}
}
SolrException.log(log, "There was a problem making a request to the leader", e);
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
}
if (i == retries - 1) {
throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem making a request to the leader");
}
}
}
} catch (IOException e) {
SolrException.log(log, "Error closing HttpSolrClient", e);
}
}
}
return leaderProps;
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class OverseerCollectionMessageHandler method commit.
void commit(NamedList results, String slice, Replica parentShardLeader) {
log.debug("Calling soft commit to make sub shard updates visible");
String coreUrl = new ZkCoreNodeProps(parentShardLeader).getCoreUrl();
// HttpShardHandler is hard coded to send a QueryRequest hence we go direct
// and we force open a searcher so that we have documents to show upon switching states
UpdateResponse updateResponse = null;
try {
updateResponse = softCommit(coreUrl);
processResponse(results, null, coreUrl, updateResponse, slice, Collections.emptySet());
} catch (Exception e) {
processResponse(results, e, coreUrl, updateResponse, slice, Collections.emptySet());
throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to call distrib softCommit on: " + coreUrl, e);
}
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class ManagedIndexSchema method getActiveReplicaCoreUrls.
protected static List<String> getActiveReplicaCoreUrls(ZkController zkController, String collection, String localCoreNodeName) {
List<String> activeReplicaCoreUrls = new ArrayList<>();
ZkStateReader zkStateReader = zkController.getZkStateReader();
ClusterState clusterState = zkStateReader.getClusterState();
Set<String> liveNodes = clusterState.getLiveNodes();
Collection<Slice> activeSlices = clusterState.getActiveSlices(collection);
if (activeSlices != null && activeSlices.size() > 0) {
for (Slice next : activeSlices) {
Map<String, Replica> replicasMap = next.getReplicasMap();
if (replicasMap != null) {
for (Map.Entry<String, Replica> entry : replicasMap.entrySet()) {
Replica replica = entry.getValue();
if (!localCoreNodeName.equals(replica.getName()) && replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
ZkCoreNodeProps replicaCoreProps = new ZkCoreNodeProps(replica);
activeReplicaCoreUrls.add(replicaCoreProps.getCoreUrl());
}
}
}
}
}
return activeReplicaCoreUrls;
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class SolrCmdDistributorTest method testMaxRetries.
private void testMaxRetries() throws IOException {
final MockStreamingSolrClients streamingClients = new MockStreamingSolrClients(updateShardHandler);
SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(streamingClients, 5, 0);
streamingClients.setExp(Exp.CONNECT_EXCEPTION);
ArrayList<Node> nodes = new ArrayList<>();
final HttpSolrClient solrclient1 = (HttpSolrClient) clients.get(0);
final AtomicInteger retries = new AtomicInteger();
ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient1.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
RetryNode retryNode = new RetryNode(new ZkCoreNodeProps(nodeProps), null, "collection1", "shard1") {
@Override
public boolean checkRetry() {
retries.incrementAndGet();
return true;
}
};
nodes.add(retryNode);
AddUpdateCommand cmd = new AddUpdateCommand(null);
cmd.solrDoc = sdoc("id", id.incrementAndGet());
ModifiableSolrParams params = new ModifiableSolrParams();
cmdDistrib.distribAdd(cmd, nodes, params);
cmdDistrib.finish();
assertEquals(6, retries.get());
assertEquals(1, cmdDistrib.getErrors().size());
}
use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.
the class SolrCmdDistributorTest method testOneRetry.
private void testOneRetry() throws Exception {
final HttpSolrClient solrclient = (HttpSolrClient) clients.get(0);
long numFoundBefore = solrclient.query(new SolrQuery("*:*")).getResults().getNumFound();
final MockStreamingSolrClients streamingClients = new MockStreamingSolrClients(updateShardHandler);
SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(streamingClients, 5, 0);
streamingClients.setExp(Exp.CONNECT_EXCEPTION);
ArrayList<Node> nodes = new ArrayList<>();
ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
final AtomicInteger retries = new AtomicInteger();
nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
RetryNode retryNode = new RetryNode(new ZkCoreNodeProps(nodeProps), null, "collection1", "shard1") {
@Override
public boolean checkRetry() {
streamingClients.setExp(null);
retries.incrementAndGet();
return true;
}
};
nodes.add(retryNode);
AddUpdateCommand cmd = new AddUpdateCommand(null);
cmd.solrDoc = sdoc("id", id.incrementAndGet());
ModifiableSolrParams params = new ModifiableSolrParams();
CommitUpdateCommand ccmd = new CommitUpdateCommand(null, false);
cmdDistrib.distribAdd(cmd, nodes, params);
cmdDistrib.distribCommit(ccmd, nodes, params);
cmdDistrib.finish();
assertEquals(1, retries.get());
long numFoundAfter = solrclient.query(new SolrQuery("*:*")).getResults().getNumFound();
// we will get java.net.ConnectException which we retry on
assertEquals(numFoundBefore + 1, numFoundAfter);
assertEquals(0, cmdDistrib.getErrors().size());
}
Aggregations