use of org.apache.http.conn.ConnectTimeoutException in project lucene-solr by apache.
the class LeaderInitiatedRecoveryThread method sendRecoveryCommandWithRetry.
protected void sendRecoveryCommandWithRetry() throws Exception {
int tries = 0;
long waitBetweenTriesMs = 5000L;
boolean continueTrying = true;
String replicaCoreName = nodeProps.getCoreName();
String recoveryUrl = nodeProps.getBaseUrl();
String replicaNodeName = nodeProps.getNodeName();
String coreNeedingRecovery = nodeProps.getCoreName();
String replicaCoreNodeName = ((Replica) nodeProps.getNodeProps()).getName();
String replicaUrl = nodeProps.getCoreUrl();
log.info(getName() + " started running to send REQUESTRECOVERY command to " + replicaUrl + "; will try for a max of " + (maxTries * (waitBetweenTriesMs / 1000)) + " secs");
RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(coreNeedingRecovery);
while (continueTrying && ++tries <= maxTries) {
if (tries > 1) {
log.warn("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover; unsuccessful after " + tries + " of " + maxTries + " attempts so far ...", coreNeedingRecovery, replicaCoreNodeName);
} else {
log.info("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover", coreNeedingRecovery, replicaCoreNodeName);
}
try (HttpSolrClient client = new HttpSolrClient.Builder(recoveryUrl).build()) {
client.setSoTimeout(60000);
client.setConnectionTimeout(15000);
try {
client.request(recoverRequestCmd);
log.info("Successfully sent " + CoreAdminAction.REQUESTRECOVERY + " command to core={} coreNodeName={} on " + recoveryUrl, coreNeedingRecovery, replicaCoreNodeName);
// succeeded, so stop looping
continueTrying = false;
} catch (Exception t) {
Throwable rootCause = SolrException.getRootCause(t);
boolean wasCommError = (rootCause instanceof ConnectException || rootCause instanceof ConnectTimeoutException || rootCause instanceof NoHttpResponseException || rootCause instanceof SocketException);
SolrException.log(log, recoveryUrl + ": Could not tell a replica to recover", t);
if (!wasCommError) {
continueTrying = false;
}
}
}
// wait a few seconds
if (continueTrying) {
try {
Thread.sleep(waitBetweenTriesMs);
} catch (InterruptedException ignoreMe) {
Thread.currentThread().interrupt();
}
if (coreContainer.isShutDown()) {
log.warn("Stop trying to send recovery command to downed replica core={} coreNodeName={} on " + replicaNodeName + " because my core container is closed.", coreNeedingRecovery, replicaCoreNodeName);
continueTrying = false;
break;
}
// see if the replica's node is still live, if not, no need to keep doing this loop
ZkStateReader zkStateReader = zkController.getZkStateReader();
if (!zkStateReader.getClusterState().liveNodesContain(replicaNodeName)) {
log.warn("Node " + replicaNodeName + " hosting core " + coreNeedingRecovery + " is no longer live. No need to keep trying to tell it to recover!");
continueTrying = false;
break;
}
String leaderCoreNodeName = leaderCd.getCloudDescriptor().getCoreNodeName();
// stop trying if I'm no longer the leader
if (leaderCoreNodeName != null && collection != null) {
String leaderCoreNodeNameFromZk = null;
try {
leaderCoreNodeNameFromZk = zkController.getZkStateReader().getLeaderRetry(collection, shardId, 1000).getName();
} catch (Exception exc) {
log.error("Failed to determine if " + leaderCoreNodeName + " is still the leader for " + collection + " " + shardId + " before starting leader-initiated recovery thread for " + replicaUrl + " due to: " + exc);
}
if (!leaderCoreNodeName.equals(leaderCoreNodeNameFromZk)) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader! New leader is " + leaderCoreNodeNameFromZk);
continueTrying = false;
break;
}
if (!leaderCd.getCloudDescriptor().isLeader()) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader!");
continueTrying = false;
break;
}
}
// before acknowledging the leader initiated recovery command
if (collection != null && shardId != null) {
try {
// call out to ZooKeeper to get the leader-initiated recovery state
final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(collection, shardId, replicaCoreNodeName);
if (lirState == null) {
log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because the znode no longer exists.");
continueTrying = false;
break;
}
if (lirState == Replica.State.RECOVERING) {
// replica has ack'd leader initiated recovery and entered the recovering state
// so we don't need to keep looping to send the command
continueTrying = false;
log.info("Replica " + coreNeedingRecovery + " on node " + replicaNodeName + " ack'd the leader initiated recovery state, " + "no need to keep trying to send recovery command");
} else {
String lcnn = zkStateReader.getLeaderRetry(collection, shardId, 5000).getName();
List<ZkCoreNodeProps> replicaProps = zkStateReader.getReplicaProps(collection, shardId, lcnn);
if (replicaProps != null && replicaProps.size() > 0) {
for (ZkCoreNodeProps prop : replicaProps) {
final Replica replica = (Replica) prop.getNodeProps();
if (replicaCoreNodeName.equals(replica.getName())) {
if (replica.getState() == Replica.State.ACTIVE) {
// which is bad if lirState is still "down"
if (lirState == Replica.State.DOWN) {
// OK, so the replica thinks it is active, but it never ack'd the leader initiated recovery
// so its state cannot be trusted and it needs to be told to recover again ... and we keep looping here
log.warn("Replica core={} coreNodeName={} set to active but the leader thinks it should be in recovery;" + " forcing it back to down state to re-run the leader-initiated recovery process; props: " + replicaProps.get(0), coreNeedingRecovery, replicaCoreNodeName);
publishDownState(replicaCoreName, replicaCoreNodeName, replicaNodeName, replicaUrl, true);
}
}
break;
}
}
}
}
} catch (Exception ignoreMe) {
log.warn("Failed to determine state of core={} coreNodeName={} due to: " + ignoreMe, coreNeedingRecovery, replicaCoreNodeName);
// eventually this loop will exhaust max tries and stop so we can just log this for now
}
}
}
}
// replica is no longer in recovery on this node (may be handled on another node)
zkController.removeReplicaFromLeaderInitiatedRecoveryHandling(replicaUrl);
if (continueTrying) {
// ugh! this means the loop timed out before the recovery command could be delivered
// how exotic do we want to get here?
log.error("Timed out after waiting for " + (tries * (waitBetweenTriesMs / 1000)) + " secs to send the recovery request to: " + replicaUrl + "; not much more we can do here?");
// TODO: need to raise a JMX event to allow monitoring tools to take over from here
}
}
use of org.apache.http.conn.ConnectTimeoutException in project lucene-solr by apache.
the class PeerSync method handleResponse.
private boolean handleResponse(ShardResponse srsp) {
ShardRequest sreq = srsp.getShardRequest();
if (srsp.getException() != null) {
// redundantly asking other replicas for them).
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrServerException) {
Throwable solrException = ((SolrServerException) srsp.getException()).getRootCause();
boolean connectTimeoutExceptionInChain = connectTimeoutExceptionInChain(srsp.getException());
if (connectTimeoutExceptionInChain || solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException || solrException instanceof NoHttpResponseException || solrException instanceof SocketException) {
log.warn(msg() + " couldn't connect to " + srsp.getShardAddress() + ", counting as success", srsp.getException());
return true;
}
}
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 503) {
log.warn(msg() + " got a 503 from " + srsp.getShardAddress() + ", counting as success", srsp.getException());
return true;
}
if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) {
log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success. " + "Perhaps /get is not registered?", srsp.getException());
return true;
}
// TODO: we should return the above information so that when we can request a recovery through zookeeper, we do
// that for these nodes
// TODO: at least log???
// srsp.getException().printStackTrace(System.out);
log.warn(msg() + " exception talking to " + srsp.getShardAddress() + ", failed", srsp.getException());
return false;
}
if (sreq.purpose == 1) {
return handleVersions(srsp);
} else {
return handleUpdates(srsp);
}
}
use of org.apache.http.conn.ConnectTimeoutException in project neo-java by coranos.
the class TestRpcServerUtil method getResponse.
/**
* returns the response from the RPC server.
*
* @param controller
* the controller to use.
* @param uri
* the uri to send.
* @param rpcVersion
* the version to send.
* @param params
* the parameters to send.
* @param method
* the method to call.
* @return the response from the RPC server.
*/
public static String getResponse(final LocalControllerNode controller, final String uri, final String rpcVersion, final JSONArray params, final String method) {
final String actualStrRaw;
try {
final JSONObject inputJson = createInputJson(rpcVersion, method, params);
final String coreRpcNode = "http://localhost:" + controller.getLocalNodeData().getRpcPort() + uri;
final StringEntity input = new StringEntity(inputJson.toString(), ContentType.APPLICATION_JSON);
final HttpPost post = new HttpPost(coreRpcNode);
final RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(TIMEOUT_MILLIS).setConnectTimeout(TIMEOUT_MILLIS).setConnectionRequestTimeout(TIMEOUT_MILLIS).build();
post.setConfig(requestConfig);
post.setEntity(input);
final CloseableHttpClient client = HttpClients.createDefault();
final String responseStr;
try {
final CloseableHttpResponse response = client.execute(post);
logDebugStatus(response);
final HttpEntity entity = response.getEntity();
responseStr = EntityUtils.toString(entity);
} catch (final ConnectTimeoutException | SocketTimeoutException | NoHttpResponseException | SocketException e) {
throw new RuntimeException(CONNECTION_EXCEPTION, e);
}
try {
final JSONObject responseJson = new JSONObject(responseStr);
actualStrRaw = responseJson.toString(2);
} catch (final JSONException e) {
throw new RuntimeException("cannot parse text \"" + responseStr + "\"", e);
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
return actualStrRaw;
}
use of org.apache.http.conn.ConnectTimeoutException in project neo-java by coranos.
the class RpcClientUtil method post.
/**
* posts a request.
*
* @param timeoutMillis
* the time to wait, in milliseconds. (used for SocketTimeout,
* ConnectTimeout, and ConnectionRequestTimeout)
* @param rpcNode
* the RPC node to use.
* @param silentErrors
* if false, log the error to LOG.error().
* @param inputJson
* the input JSON to use.
* @return the response, or null if an error occurs due to a timeout.
*/
public static JSONObject post(final long timeoutMillis, final String rpcNode, final boolean silentErrors, final JSONObject inputJson) {
LOG.debug("inputJson:{}", inputJson);
final StringEntity input = new StringEntity(inputJson.toString(), ContentType.APPLICATION_JSON);
final HttpPost post = new HttpPost(rpcNode);
final RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout((int) timeoutMillis).setConnectTimeout((int) timeoutMillis).setConnectionRequestTimeout((int) timeoutMillis).build();
post.setConfig(requestConfig);
post.setEntity(input);
final CloseableHttpClient client = HttpClients.createDefault();
final String str;
try {
final CloseableHttpResponse response = client.execute(post);
LOG.debug("status:{}", response.getStatusLine());
final HttpEntity entity = response.getEntity();
str = EntityUtils.toString(entity);
} catch (final ConnectTimeoutException | SocketTimeoutException | NoHttpResponseException | SocketException e) {
if (!silentErrors) {
LOG.error("post {} {} connection error:{}", rpcNode, inputJson, e.getMessage());
}
return null;
} catch (final IOException e) {
throw new RuntimeException(e);
}
if (!str.startsWith("{")) {
if (!silentErrors) {
LOG.error("post {} {} json error:\"{}\"", rpcNode, inputJson, str);
}
return null;
}
final JSONObject outputJson = new JSONObject(str);
LOG.debug("outputJson:{}", outputJson.toString(2));
return outputJson;
}
use of org.apache.http.conn.ConnectTimeoutException in project 91Pop by DanteAndroid.
the class ApiException method handleException.
public static ApiException handleException(Throwable e) {
// 使用RxCache之后返回的是包裹的CompositeException,一般包含2个异常,rxcache异常和原本的异常
Logger.t(TAG).d("开始解析错误------");
if (e instanceof CompositeException) {
CompositeException compositeException = (CompositeException) e;
for (Throwable throwable : compositeException.getExceptions()) {
if (!(throwable instanceof RxCacheException)) {
e = throwable;
Logger.t(TAG).d("其他异常:" + throwable.getMessage());
} else {
Logger.t(TAG).d("RxCache 异常");
}
}
}
ApiException ex;
if (e instanceof HttpException) {
HttpException httpException = (HttpException) e;
ex = new ApiException(httpException, httpException.code());
ex.message = httpException.getMessage();
return ex;
} else if (e instanceof JsonParseException || e instanceof JSONException || e instanceof JsonSerializer || e instanceof NotSerializableException || e instanceof ParseException) {
ex = new ApiException(e, Error.PARSE_ERROR);
ex.message = "数据解析错误";
return ex;
} else if (e instanceof ClassCastException) {
ex = new ApiException(e, Error.CAST_ERROR);
ex.message = "类型转换错误";
return ex;
} else if (e instanceof ConnectException) {
ex = new ApiException(e, Error.NETWORD_ERROR);
ex.message = "连接失败";
return ex;
} else if (e instanceof javax.net.ssl.SSLHandshakeException) {
ex = new ApiException(e, Error.SSL_ERROR);
ex.message = "证书验证失败";
return ex;
} else if (e instanceof ConnectTimeoutException) {
ex = new ApiException(e, Error.TIMEOUT_ERROR);
ex.message = "网络连接超时";
return ex;
} else if (e instanceof java.net.SocketTimeoutException) {
ex = new ApiException(e, Error.TIMEOUT_ERROR);
ex.message = "网络连接超时";
return ex;
} else if (e instanceof UnknownHostException) {
ex = new ApiException(e, Error.UNKNOWNHOST_ERROR);
ex.message = "无法解析该域名";
return ex;
} else if (e instanceof NullPointerException) {
ex = new ApiException(e, Error.NULLPOINTER_EXCEPTION);
ex.message = "NullPointerException";
return ex;
} else if (e instanceof VideoException) {
ex = new ApiException(e, Error.PARSE_VIDEO_URL_ERROR);
ex.message = e.getMessage();
return ex;
} else if (e instanceof FavoriteException) {
ex = new ApiException(e, Error.FAVORITE_VIDEO_ERROR);
ex.message = e.getMessage();
return ex;
} else if (e instanceof DaoException) {
ex = new ApiException(e, Error.GREEN_DAO_ERROR);
ex.message = "数据库错误";
return ex;
} else if (e instanceof MessageException) {
ex = new ApiException(e, Error.COMMON_MESSAGE_ERROR);
ex.message = e.getMessage();
return ex;
} else {
ex = new ApiException(e, Error.UNKNOWN);
ex.message = "未知错误:" + e.getMessage();
return ex;
}
}
Aggregations