use of org.apache.http.NoHttpResponseException in project lucene-solr by apache.
the class LeaderInitiatedRecoveryThread method sendRecoveryCommandWithRetry.
protected void sendRecoveryCommandWithRetry() throws Exception {
int tries = 0;
long waitBetweenTriesMs = 5000L;
boolean continueTrying = true;
String replicaCoreName = nodeProps.getCoreName();
String recoveryUrl = nodeProps.getBaseUrl();
String replicaNodeName = nodeProps.getNodeName();
String coreNeedingRecovery = nodeProps.getCoreName();
String replicaCoreNodeName = ((Replica) nodeProps.getNodeProps()).getName();
String replicaUrl = nodeProps.getCoreUrl();
log.info(getName() + " started running to send REQUESTRECOVERY command to " + replicaUrl + "; will try for a max of " + (maxTries * (waitBetweenTriesMs / 1000)) + " secs");
RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(coreNeedingRecovery);
while (continueTrying && ++tries <= maxTries) {
if (tries > 1) {
log.warn("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover; unsuccessful after " + tries + " of " + maxTries + " attempts so far ...", coreNeedingRecovery, replicaCoreNodeName);
} else {
log.info("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover", coreNeedingRecovery, replicaCoreNodeName);
}
try (HttpSolrClient client = new HttpSolrClient.Builder(recoveryUrl).build()) {
client.setSoTimeout(60000);
client.setConnectionTimeout(15000);
try {
client.request(recoverRequestCmd);
log.info("Successfully sent " + CoreAdminAction.REQUESTRECOVERY + " command to core={} coreNodeName={} on " + recoveryUrl, coreNeedingRecovery, replicaCoreNodeName);
// succeeded, so stop looping
continueTrying = false;
} catch (Exception t) {
Throwable rootCause = SolrException.getRootCause(t);
boolean wasCommError = (rootCause instanceof ConnectException || rootCause instanceof ConnectTimeoutException || rootCause instanceof NoHttpResponseException || rootCause instanceof SocketException);
SolrException.log(log, recoveryUrl + ": Could not tell a replica to recover", t);
if (!wasCommError) {
continueTrying = false;
}
}
}
// wait a few seconds
if (continueTrying) {
try {
Thread.sleep(waitBetweenTriesMs);
} catch (InterruptedException ignoreMe) {
Thread.currentThread().interrupt();
}
if (coreContainer.isShutDown()) {
log.warn("Stop trying to send recovery command to downed replica core={} coreNodeName={} on " + replicaNodeName + " because my core container is closed.", coreNeedingRecovery, replicaCoreNodeName);
continueTrying = false;
break;
}
// see if the replica's node is still live, if not, no need to keep doing this loop
ZkStateReader zkStateReader = zkController.getZkStateReader();
if (!zkStateReader.getClusterState().liveNodesContain(replicaNodeName)) {
log.warn("Node " + replicaNodeName + " hosting core " + coreNeedingRecovery + " is no longer live. No need to keep trying to tell it to recover!");
continueTrying = false;
break;
}
String leaderCoreNodeName = leaderCd.getCloudDescriptor().getCoreNodeName();
// stop trying if I'm no longer the leader
if (leaderCoreNodeName != null && collection != null) {
String leaderCoreNodeNameFromZk = null;
try {
leaderCoreNodeNameFromZk = zkController.getZkStateReader().getLeaderRetry(collection, shardId, 1000).getName();
} catch (Exception exc) {
log.error("Failed to determine if " + leaderCoreNodeName + " is still the leader for " + collection + " " + shardId + " before starting leader-initiated recovery thread for " + replicaUrl + " due to: " + exc);
}
if (!leaderCoreNodeName.equals(leaderCoreNodeNameFromZk)) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader! New leader is " + leaderCoreNodeNameFromZk);
continueTrying = false;
break;
}
if (!leaderCd.getCloudDescriptor().isLeader()) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader!");
continueTrying = false;
break;
}
}
// before acknowledging the leader initiated recovery command
if (collection != null && shardId != null) {
try {
// call out to ZooKeeper to get the leader-initiated recovery state
final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(collection, shardId, replicaCoreNodeName);
if (lirState == null) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because the znode no longer exists.");
continueTrying = false;
break;
}
if (lirState == Replica.State.RECOVERING) {
// replica has ack'd leader initiated recovery and entered the recovering state
// so we don't need to keep looping to send the command
continueTrying = false;
log.info("Replica " + coreNeedingRecovery + " on node " + replicaNodeName + " ack'd the leader initiated recovery state, " + "no need to keep trying to send recovery command");
} else {
String lcnn = zkStateReader.getLeaderRetry(collection, shardId, 5000).getName();
List<ZkCoreNodeProps> replicaProps = zkStateReader.getReplicaProps(collection, shardId, lcnn);
if (replicaProps != null && replicaProps.size() > 0) {
for (ZkCoreNodeProps prop : replicaProps) {
final Replica replica = (Replica) prop.getNodeProps();
if (replicaCoreNodeName.equals(replica.getName())) {
if (replica.getState() == Replica.State.ACTIVE) {
// which is bad if lirState is still "down"
if (lirState == Replica.State.DOWN) {
// OK, so the replica thinks it is active, but it never ack'd the leader initiated recovery
// so its state cannot be trusted and it needs to be told to recover again ... and we keep looping here
log.warn("Replica core={} coreNodeName={} set to active but the leader thinks it should be in recovery;" + " forcing it back to down state to re-run the leader-initiated recovery process; props: " + replicaProps.get(0), coreNeedingRecovery, replicaCoreNodeName);
publishDownState(replicaCoreName, replicaCoreNodeName, replicaNodeName, replicaUrl, true);
}
}
break;
}
}
}
}
} catch (Exception ignoreMe) {
log.warn("Failed to determine state of core={} coreNodeName={} due to: " + ignoreMe, coreNeedingRecovery, replicaCoreNodeName);
// eventually this loop will exhaust max tries and stop so we can just log this for now
}
}
}
}
// replica is no longer in recovery on this node (may be handled on another node)
zkController.removeReplicaFromLeaderInitiatedRecoveryHandling(replicaUrl);
if (continueTrying) {
// ugh! this means the loop timed out before the recovery command could be delivered
// how exotic do we want to get here?
log.error("Timed out after waiting for " + (tries * (waitBetweenTriesMs / 1000)) + " secs to send the recovery request to: " + replicaUrl + "; not much more we can do here?");
// TODO: need to raise a JMX event to allow monitoring tools to take over from here
}
}
use of org.apache.http.NoHttpResponseException in project lucene-solr by apache.
the class PeerSync method handleResponse.
private boolean handleResponse(ShardResponse srsp) {
ShardRequest sreq = srsp.getShardRequest();
if (srsp.getException() != null) {
// redundantly asking other replicas for them).
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrServerException) {
Throwable solrException = ((SolrServerException) srsp.getException()).getRootCause();
boolean connectTimeoutExceptionInChain = connectTimeoutExceptionInChain(srsp.getException());
if (connectTimeoutExceptionInChain || solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException || solrException instanceof NoHttpResponseException || solrException instanceof SocketException) {
log.warn(msg() + " couldn't connect to " + srsp.getShardAddress() + ", counting as success", srsp.getException());
return true;
}
}
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 503) {
log.warn(msg() + " got a 503 from " + srsp.getShardAddress() + ", counting as success", srsp.getException());
return true;
}
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) {
log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success. " + "Perhaps /get is not registered?", srsp.getException());
return true;
}
// TODO: we should return the above information so that when we can request a recovery through zookeeper, we do
// that for these nodes
// TODO: at least log???
// srsp.getException().printStackTrace(System.out);
log.warn(msg() + " exception talking to " + srsp.getShardAddress() + ", failed", srsp.getException());
return false;
}
if (sreq.purpose == 1) {
return handleVersions(srsp);
} else {
return handleUpdates(srsp);
}
}
use of org.apache.http.NoHttpResponseException in project RxJavaInAction by fengzhizi715.
the class RetryHandler method retryRequest.
@Override
public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
if (executionCount >= 3) {
// 如果已经重试了3次,就放弃
return false;
}
if (exception instanceof NoHttpResponseException) {
// 如果服务器丢掉了连接,那么就重试
return true;
}
if (exception instanceof SSLHandshakeException) {
// 不要重试SSL握手异常
return false;
}
if (exception instanceof InterruptedIOException) {
// 超时
return true;
}
if (exception instanceof UnknownHostException) {
// 目标服务器不可达
return false;
}
if (exception instanceof ConnectTimeoutException) {
// 连接被拒绝
return false;
}
if (exception instanceof SSLException) {
// ssl握手异常
return false;
}
HttpClientContext clientContext = HttpClientContext.adapt(context);
HttpRequest request = clientContext.getRequest();
// 如果请求是幂等的,就再次尝试
if (!(request instanceof HttpEntityEnclosingRequest)) {
return true;
}
return false;
}
use of org.apache.http.NoHttpResponseException in project JFramework by gugumall.
the class MyHttpRequestRetryHandler method retryRequest.
/*
* (non-Javadoc)
* @see org.apache.http.client.HttpRequestRetryHandler#retryRequest(java.io.IOException, int, org.apache.http.protocol.HttpContext)
*/
public boolean retryRequest(IOException exception, int executionCount, HttpContext context) {
if (executionCount >= retries) {
// Do not retry if over max retry count
return false;
}
if (exception instanceof NoHttpResponseException) {
// Do not retry if the server dropped connection on us
// return false;
}
if (exception instanceof SSLHandshakeException) {
// Do not retry on SSL handshake exception
return false;
}
HttpRequest request = (HttpRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
boolean idempotent = !(request instanceof HttpEntityEnclosingRequest);
if (idempotent) {
// Retry if the request is considered idempotent
return false;
}
return false;
}
use of org.apache.http.NoHttpResponseException in project neo-java by coranos.
the class TestRpcServerUtil method getResponse.
/**
* returns the response from the RPC server.
*
* @param controller
* the controller to use.
* @param uri
* the uri to send.
* @param rpcVersion
* the version to send.
* @param params
* the parameters to send.
* @param method
* the method to call.
* @return the response from the RPC server.
*/
public static String getResponse(final LocalControllerNode controller, final String uri, final String rpcVersion, final JSONArray params, final String method) {
final String actualStrRaw;
try {
final JSONObject inputJson = createInputJson(rpcVersion, method, params);
final String coreRpcNode = "http://localhost:" + controller.getLocalNodeData().getRpcPort() + uri;
final StringEntity input = new StringEntity(inputJson.toString(), ContentType.APPLICATION_JSON);
final HttpPost post = new HttpPost(coreRpcNode);
final RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(TIMEOUT_MILLIS).setConnectTimeout(TIMEOUT_MILLIS).setConnectionRequestTimeout(TIMEOUT_MILLIS).build();
post.setConfig(requestConfig);
post.setEntity(input);
final CloseableHttpClient client = HttpClients.createDefault();
final String responseStr;
try {
final CloseableHttpResponse response = client.execute(post);
logDebugStatus(response);
final HttpEntity entity = response.getEntity();
responseStr = EntityUtils.toString(entity);
} catch (final ConnectTimeoutException | SocketTimeoutException | NoHttpResponseException | SocketException e) {
throw new RuntimeException(CONNECTION_EXCEPTION, e);
}
try {
final JSONObject responseJson = new JSONObject(responseStr);
actualStrRaw = responseJson.toString(2);
} catch (final JSONException e) {
throw new RuntimeException("cannot parse text \"" + responseStr + "\"", e);
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
return actualStrRaw;
}
Aggregations