Search in sources :

Example 36 with ConnectTimeoutException

use of org.apache.http.conn.ConnectTimeoutException in project lucene-solr by apache.

the class CloudSolrClient method requestWithRetryOnStaleState.

/**
   * As this class doesn't watch external collections on the client side,
   * there's a chance that the request will fail due to cached stale state,
   * which means the state must be refreshed from ZK and retried.
   */
protected NamedList<Object> requestWithRetryOnStaleState(SolrRequest request, int retryCount, String collection) throws SolrServerException, IOException {
    // important to call this before you start working with the ZkStateReader
    connect();
    // build up a _stateVer_ param to pass to the server containing all of the
    // external collection state versions involved in this request, which allows
    // the server to notify us that our cached state for one or more of the external
    // collections is stale and needs to be refreshed ... this code has no impact on internal collections
    String stateVerParam = null;
    List<DocCollection> requestedCollections = null;
    boolean isCollectionRequestOfV2 = false;
    if (request instanceof V2Request) {
        isCollectionRequestOfV2 = ((V2Request) request).isPerCollectionRequest();
    }
    boolean isAdmin = ADMIN_PATHS.contains(request.getPath());
    if (collection != null && !isAdmin && !isCollectionRequestOfV2) {
        // don't do _stateVer_ checking for admin, v2 api requests
        Set<String> requestedCollectionNames = getCollectionNames(collection);
        StringBuilder stateVerParamBuilder = null;
        for (String requestedCollection : requestedCollectionNames) {
            // track the version of state we're using on the client side using the _stateVer_ param
            DocCollection coll = getDocCollection(requestedCollection, null);
            if (coll == null) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Collection not found: " + requestedCollection);
            }
            int collVer = coll.getZNodeVersion();
            if (coll.getStateFormat() > 1) {
                if (requestedCollections == null)
                    requestedCollections = new ArrayList<>(requestedCollectionNames.size());
                requestedCollections.add(coll);
                if (stateVerParamBuilder == null) {
                    stateVerParamBuilder = new StringBuilder();
                } else {
                    // hopefully pipe is not an allowed char in a collection name
                    stateVerParamBuilder.append("|");
                }
                stateVerParamBuilder.append(coll.getName()).append(":").append(collVer);
            }
        }
        if (stateVerParamBuilder != null) {
            stateVerParam = stateVerParamBuilder.toString();
        }
    }
    if (request.getParams() instanceof ModifiableSolrParams) {
        ModifiableSolrParams params = (ModifiableSolrParams) request.getParams();
        if (stateVerParam != null) {
            params.set(STATE_VERSION, stateVerParam);
        } else {
            params.remove(STATE_VERSION);
        }
    }
    // else: ??? how to set this ???
    NamedList<Object> resp = null;
    try {
        resp = sendRequest(request, collection);
        //to avoid an O(n) operation we always add STATE_VERSION to the last and try to read it from there
        Object o = resp == null || resp.size() == 0 ? null : resp.get(STATE_VERSION, resp.size() - 1);
        if (o != null && o instanceof Map) {
            //remove this because no one else needs this and tests would fail if they are comparing responses
            resp.remove(resp.size() - 1);
            Map invalidStates = (Map) o;
            for (Object invalidEntries : invalidStates.entrySet()) {
                Map.Entry e = (Map.Entry) invalidEntries;
                getDocCollection((String) e.getKey(), (Integer) e.getValue());
            }
        }
    } catch (Exception exc) {
        Throwable rootCause = SolrException.getRootCause(exc);
        // or request is v2 api and its method is not GET
        if (collection == null || isAdmin || (request instanceof V2Request && request.getMethod() != SolrRequest.METHOD.GET)) {
            if (exc instanceof SolrServerException) {
                throw (SolrServerException) exc;
            } else if (exc instanceof IOException) {
                throw (IOException) exc;
            } else if (exc instanceof RuntimeException) {
                throw (RuntimeException) exc;
            } else {
                throw new SolrServerException(rootCause);
            }
        }
        int errorCode = (rootCause instanceof SolrException) ? ((SolrException) rootCause).code() : SolrException.ErrorCode.UNKNOWN.code;
        log.error("Request to collection {} failed due to (" + errorCode + ") {}, retry? " + retryCount, collection, rootCause.toString());
        boolean wasCommError = (rootCause instanceof ConnectException || rootCause instanceof ConnectTimeoutException || rootCause instanceof NoHttpResponseException || rootCause instanceof SocketException);
        if (wasCommError) {
            // in retryExpiryTime time
            for (DocCollection ext : requestedCollections) {
                ExpiringCachedDocCollection cacheEntry = collectionStateCache.get(ext.getName());
                if (cacheEntry == null)
                    continue;
                cacheEntry.maybeStale = true;
            }
            if (retryCount < MAX_STALE_RETRIES) {
                // the state would not have been updated
                return requestWithRetryOnStaleState(request, retryCount + 1, collection);
            }
        }
        boolean stateWasStale = false;
        if (retryCount < MAX_STALE_RETRIES && requestedCollections != null && !requestedCollections.isEmpty() && SolrException.ErrorCode.getErrorCode(errorCode) == SolrException.ErrorCode.INVALID_STATE) {
            // cached state for one or more external collections was stale
            // re-issue request using updated state
            stateWasStale = true;
            // just re-read state for all of them, which is a little heavy handed but hopefully a rare occurrence
            for (DocCollection ext : requestedCollections) {
                collectionStateCache.remove(ext.getName());
            }
        }
        // with ZK just to make sure the node we're trying to hit is still part of the collection
        if (retryCount < MAX_STALE_RETRIES && !stateWasStale && requestedCollections != null && !requestedCollections.isEmpty() && wasCommError) {
            for (DocCollection ext : requestedCollections) {
                DocCollection latestStateFromZk = getDocCollection(ext.getName(), null);
                if (latestStateFromZk.getZNodeVersion() != ext.getZNodeVersion()) {
                    // looks like we couldn't reach the server because the state was stale == retry
                    stateWasStale = true;
                    // we just pulled state from ZK, so update the cache so that the retry uses it
                    collectionStateCache.put(ext.getName(), new ExpiringCachedDocCollection(latestStateFromZk));
                }
            }
        }
        if (requestedCollections != null) {
            // done with this
            requestedCollections.clear();
        }
        // if the state was stale, then we retry the request once with new state pulled from Zk
        if (stateWasStale) {
            log.warn("Re-trying request to  collection(s) " + collection + " after stale state error from server.");
            resp = requestWithRetryOnStaleState(request, retryCount + 1, collection);
        } else {
            if (exc instanceof SolrException) {
                throw exc;
            }
            if (exc instanceof SolrServerException) {
                throw (SolrServerException) exc;
            } else if (exc instanceof IOException) {
                throw (IOException) exc;
            } else {
                throw new SolrServerException(rootCause);
            }
        }
    }
    return resp;
}
Also used : SocketException(java.net.SocketException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) ConnectException(java.net.ConnectException) NoHttpResponseException(org.apache.http.NoHttpResponseException) IOException(java.io.IOException) V2Request(org.apache.solr.client.solrj.request.V2Request) TimeoutException(java.util.concurrent.TimeoutException) NoHttpResponseException(org.apache.http.NoHttpResponseException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) SocketException(java.net.SocketException) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Example 37 with ConnectTimeoutException

use of org.apache.http.conn.ConnectTimeoutException in project lucene-solr by apache.

the class PeerSync method handleResponse.

private boolean handleResponse(ShardResponse srsp) {
    ShardRequest sreq = srsp.getShardRequest();
    if (srsp.getException() != null) {
        // redundantly asking other replicas for them).
        if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrServerException) {
            Throwable solrException = ((SolrServerException) srsp.getException()).getRootCause();
            boolean connectTimeoutExceptionInChain = connectTimeoutExceptionInChain(srsp.getException());
            if (connectTimeoutExceptionInChain || solrException instanceof ConnectException || solrException instanceof ConnectTimeoutException || solrException instanceof NoHttpResponseException || solrException instanceof SocketException) {
                log.warn(msg() + " couldn't connect to " + srsp.getShardAddress() + ", counting as success", srsp.getException());
                return true;
            }
        }
        if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 503) {
            log.warn(msg() + " got a 503 from " + srsp.getShardAddress() + ", counting as success", srsp.getException());
            return true;
        }
        if (cantReachIsSuccess && sreq.purpose == 1 && srsp.getException() instanceof SolrException && ((SolrException) srsp.getException()).code() == 404) {
            log.warn(msg() + " got a 404 from " + srsp.getShardAddress() + ", counting as success. " + "Perhaps /get is not registered?", srsp.getException());
            return true;
        }
        // TODO: we should return the above information so that when we can request a recovery through zookeeper, we do
        // that for these nodes
        // TODO: at least log???
        // srsp.getException().printStackTrace(System.out);
        log.warn(msg() + " exception talking to " + srsp.getShardAddress() + ", failed", srsp.getException());
        return false;
    }
    if (sreq.purpose == 1) {
        return handleVersions(srsp);
    } else {
        return handleUpdates(srsp);
    }
}
Also used : NoHttpResponseException(org.apache.http.NoHttpResponseException) SocketException(java.net.SocketException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ShardRequest(org.apache.solr.handler.component.ShardRequest) SolrException(org.apache.solr.common.SolrException) ConnectException(java.net.ConnectException) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Example 38 with ConnectTimeoutException

use of org.apache.http.conn.ConnectTimeoutException in project lucene-solr by apache.

the class LeaderInitiatedRecoveryThread method sendRecoveryCommandWithRetry.

protected void sendRecoveryCommandWithRetry() throws Exception {
    int tries = 0;
    long waitBetweenTriesMs = 5000L;
    boolean continueTrying = true;
    String replicaCoreName = nodeProps.getCoreName();
    String recoveryUrl = nodeProps.getBaseUrl();
    String replicaNodeName = nodeProps.getNodeName();
    String coreNeedingRecovery = nodeProps.getCoreName();
    String replicaCoreNodeName = ((Replica) nodeProps.getNodeProps()).getName();
    String replicaUrl = nodeProps.getCoreUrl();
    log.info(getName() + " started running to send REQUESTRECOVERY command to " + replicaUrl + "; will try for a max of " + (maxTries * (waitBetweenTriesMs / 1000)) + " secs");
    RequestRecovery recoverRequestCmd = new RequestRecovery();
    recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
    recoverRequestCmd.setCoreName(coreNeedingRecovery);
    while (continueTrying && ++tries <= maxTries) {
        if (tries > 1) {
            log.warn("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover; unsuccessful after " + tries + " of " + maxTries + " attempts so far ...", coreNeedingRecovery, replicaCoreNodeName);
        } else {
            log.info("Asking core={} coreNodeName={} on " + recoveryUrl + " to recover", coreNeedingRecovery, replicaCoreNodeName);
        }
        try (HttpSolrClient client = new HttpSolrClient.Builder(recoveryUrl).build()) {
            client.setSoTimeout(60000);
            client.setConnectionTimeout(15000);
            try {
                client.request(recoverRequestCmd);
                log.info("Successfully sent " + CoreAdminAction.REQUESTRECOVERY + " command to core={} coreNodeName={} on " + recoveryUrl, coreNeedingRecovery, replicaCoreNodeName);
                // succeeded, so stop looping
                continueTrying = false;
            } catch (Exception t) {
                Throwable rootCause = SolrException.getRootCause(t);
                boolean wasCommError = (rootCause instanceof ConnectException || rootCause instanceof ConnectTimeoutException || rootCause instanceof NoHttpResponseException || rootCause instanceof SocketException);
                SolrException.log(log, recoveryUrl + ": Could not tell a replica to recover", t);
                if (!wasCommError) {
                    continueTrying = false;
                }
            }
        }
        // wait a few seconds
        if (continueTrying) {
            try {
                Thread.sleep(waitBetweenTriesMs);
            } catch (InterruptedException ignoreMe) {
                Thread.currentThread().interrupt();
            }
            if (coreContainer.isShutDown()) {
                log.warn("Stop trying to send recovery command to downed replica core={} coreNodeName={} on " + replicaNodeName + " because my core container is closed.", coreNeedingRecovery, replicaCoreNodeName);
                continueTrying = false;
                break;
            }
            // see if the replica's node is still live, if not, no need to keep doing this loop
            ZkStateReader zkStateReader = zkController.getZkStateReader();
            if (!zkStateReader.getClusterState().liveNodesContain(replicaNodeName)) {
                log.warn("Node " + replicaNodeName + " hosting core " + coreNeedingRecovery + " is no longer live. No need to keep trying to tell it to recover!");
                continueTrying = false;
                break;
            }
            String leaderCoreNodeName = leaderCd.getCloudDescriptor().getCoreNodeName();
            // stop trying if I'm no longer the leader
            if (leaderCoreNodeName != null && collection != null) {
                String leaderCoreNodeNameFromZk = null;
                try {
                    leaderCoreNodeNameFromZk = zkController.getZkStateReader().getLeaderRetry(collection, shardId, 1000).getName();
                } catch (Exception exc) {
                    log.error("Failed to determine if " + leaderCoreNodeName + " is still the leader for " + collection + " " + shardId + " before starting leader-initiated recovery thread for " + replicaUrl + " due to: " + exc);
                }
                if (!leaderCoreNodeName.equals(leaderCoreNodeNameFromZk)) {
                    log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader! New leader is " + leaderCoreNodeNameFromZk);
                    continueTrying = false;
                    break;
                }
                if (!leaderCd.getCloudDescriptor().isLeader()) {
                    log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because " + leaderCoreNodeName + " is no longer the leader!");
                    continueTrying = false;
                    break;
                }
            }
            // before acknowledging the leader initiated recovery command
            if (collection != null && shardId != null) {
                try {
                    // call out to ZooKeeper to get the leader-initiated recovery state
                    final Replica.State lirState = zkController.getLeaderInitiatedRecoveryState(collection, shardId, replicaCoreNodeName);
                    if (lirState == null) {
                        log.warn("Stop trying to send recovery command to downed replica core=" + coreNeedingRecovery + ",coreNodeName=" + replicaCoreNodeName + " on " + replicaNodeName + " because the znode no longer exists.");
                        continueTrying = false;
                        break;
                    }
                    if (lirState == Replica.State.RECOVERING) {
                        // replica has ack'd leader initiated recovery and entered the recovering state
                        // so we don't need to keep looping to send the command
                        continueTrying = false;
                        log.info("Replica " + coreNeedingRecovery + " on node " + replicaNodeName + " ack'd the leader initiated recovery state, " + "no need to keep trying to send recovery command");
                    } else {
                        String lcnn = zkStateReader.getLeaderRetry(collection, shardId, 5000).getName();
                        List<ZkCoreNodeProps> replicaProps = zkStateReader.getReplicaProps(collection, shardId, lcnn);
                        if (replicaProps != null && replicaProps.size() > 0) {
                            for (ZkCoreNodeProps prop : replicaProps) {
                                final Replica replica = (Replica) prop.getNodeProps();
                                if (replicaCoreNodeName.equals(replica.getName())) {
                                    if (replica.getState() == Replica.State.ACTIVE) {
                                        // which is bad if lirState is still "down"
                                        if (lirState == Replica.State.DOWN) {
                                            // OK, so the replica thinks it is active, but it never ack'd the leader initiated recovery
                                            // so its state cannot be trusted and it needs to be told to recover again ... and we keep looping here
                                            log.warn("Replica core={} coreNodeName={} set to active but the leader thinks it should be in recovery;" + " forcing it back to down state to re-run the leader-initiated recovery process; props: " + replicaProps.get(0), coreNeedingRecovery, replicaCoreNodeName);
                                            publishDownState(replicaCoreName, replicaCoreNodeName, replicaNodeName, replicaUrl, true);
                                        }
                                    }
                                    break;
                                }
                            }
                        }
                    }
                } catch (Exception ignoreMe) {
                    log.warn("Failed to determine state of core={} coreNodeName={} due to: " + ignoreMe, coreNeedingRecovery, replicaCoreNodeName);
                // eventually this loop will exhaust max tries and stop so we can just log this for now
                }
            }
        }
    }
    // replica is no longer in recovery on this node (may be handled on another node)
    zkController.removeReplicaFromLeaderInitiatedRecoveryHandling(replicaUrl);
    if (continueTrying) {
        // ugh! this means the loop timed out before the recovery command could be delivered
        // how exotic do we want to get here?
        log.error("Timed out after waiting for " + (tries * (waitBetweenTriesMs / 1000)) + " secs to send the recovery request to: " + replicaUrl + "; not much more we can do here?");
    // TODO: need to raise a JMX event to allow monitoring tools to take over from here
    }
}
Also used : NoHttpResponseException(org.apache.http.NoHttpResponseException) SocketException(java.net.SocketException) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) RequestRecovery(org.apache.solr.client.solrj.request.CoreAdminRequest.RequestRecovery) Replica(org.apache.solr.common.cloud.Replica) KeeperException(org.apache.zookeeper.KeeperException) NoHttpResponseException(org.apache.http.NoHttpResponseException) SolrException(org.apache.solr.common.SolrException) SocketException(java.net.SocketException) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException) ConnectException(java.net.ConnectException) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) ConnectException(java.net.ConnectException) ConnectTimeoutException(org.apache.http.conn.ConnectTimeoutException)

Aggregations

ConnectTimeoutException (org.apache.http.conn.ConnectTimeoutException)38 IOException (java.io.IOException)17 SocketTimeoutException (java.net.SocketTimeoutException)16 HttpResponse (org.apache.http.HttpResponse)14 HashMap (java.util.HashMap)12 StatusLine (org.apache.http.StatusLine)11 MalformedURLException (java.net.MalformedURLException)10 TimeoutError (com.android.volley.TimeoutError)9 SocketException (java.net.SocketException)9 AuthFailureError (com.android.volley.AuthFailureError)8 NetworkError (com.android.volley.NetworkError)8 NetworkResponse (com.android.volley.NetworkResponse)8 NoConnectionError (com.android.volley.NoConnectionError)8 ServerError (com.android.volley.ServerError)8 DefaultHttpClient (org.apache.http.impl.client.DefaultHttpClient)8 ConnectException (java.net.ConnectException)6 HttpPost (org.apache.http.client.methods.HttpPost)6 Test (org.junit.Test)6 Entry (com.android.volley.Cache.Entry)4 InetSocketAddress (java.net.InetSocketAddress)4