Search in sources :

Example 6 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class ZkController method waitForLeaderToSeeDownState.

private ZkCoreNodeProps waitForLeaderToSeeDownState(CoreDescriptor descriptor, final String coreZkNodeName) {
    // try not to wait too long here - if we are waiting too long, we should probably
    // move along and join the election
    CloudDescriptor cloudDesc = descriptor.getCloudDescriptor();
    String collection = cloudDesc.getCollectionName();
    String shard = cloudDesc.getShardId();
    ZkCoreNodeProps leaderProps = null;
    int retries = 2;
    for (int i = 0; i < retries; i++) {
        try {
            if (isClosed) {
                throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed");
            }
            // go straight to zk, not the cloud state - we want current info
            leaderProps = getLeaderProps(collection, shard, 5000);
            break;
        } catch (Exception e) {
            SolrException.log(log, "There was a problem finding the leader in zk", e);
            try {
                Thread.sleep(2000);
            } catch (InterruptedException e1) {
                Thread.currentThread().interrupt();
            }
            if (i == retries - 1) {
                throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem finding the leader in zk");
            }
        }
    }
    String leaderBaseUrl = leaderProps.getBaseUrl();
    String leaderCoreName = leaderProps.getCoreName();
    String myCoreNodeName = cloudDesc.getCoreNodeName();
    String myCoreName = descriptor.getName();
    String ourUrl = ZkCoreNodeProps.getCoreUrl(getBaseUrl(), myCoreName);
    boolean isLeader = leaderProps.getCoreUrl().equals(ourUrl);
    if (!isLeader && !SKIP_AUTO_RECOVERY) {
        // detect if this core is in leader-initiated recovery and if so, 
        // then we don't need the leader to wait on seeing the down state
        Replica.State lirState = null;
        try {
            lirState = getLeaderInitiatedRecoveryState(collection, shard, myCoreNodeName);
        } catch (Exception exc) {
            log.error("Failed to determine if replica " + myCoreNodeName + " is in leader-initiated recovery due to: " + exc, exc);
        }
        if (lirState != null) {
            log.debug("Replica " + myCoreNodeName + " is already in leader-initiated recovery, so not waiting for leader to see down state.");
        } else {
            log.info("Replica " + myCoreNodeName + " NOT in leader-initiated recovery, need to wait for leader to see down state.");
            try (HttpSolrClient client = new Builder(leaderBaseUrl).build()) {
                client.setConnectionTimeout(15000);
                client.setSoTimeout(120000);
                WaitForState prepCmd = new WaitForState();
                prepCmd.setCoreName(leaderCoreName);
                prepCmd.setNodeName(getNodeName());
                prepCmd.setCoreNodeName(coreZkNodeName);
                prepCmd.setState(Replica.State.DOWN);
                // let's retry a couple times - perhaps the leader just went down,
                // or perhaps he is just not quite ready for us yet
                retries = 2;
                for (int i = 0; i < retries; i++) {
                    if (isClosed) {
                        throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed");
                    }
                    try {
                        client.request(prepCmd);
                        break;
                    } catch (Exception e) {
                        // if the core container is shutdown, don't wait
                        if (cc.isShutDown()) {
                            throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Core container is shutdown.");
                        }
                        Throwable rootCause = SolrException.getRootCause(e);
                        if (rootCause instanceof IOException) {
                            // if there was a communication error talking to the leader, see if the leader is even alive
                            if (!zkStateReader.getClusterState().liveNodesContain(leaderProps.getNodeName())) {
                                throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Node " + leaderProps.getNodeName() + " hosting leader for " + shard + " in " + collection + " is not live!");
                            }
                        }
                        SolrException.log(log, "There was a problem making a request to the leader", e);
                        try {
                            Thread.sleep(2000);
                        } catch (InterruptedException e1) {
                            Thread.currentThread().interrupt();
                        }
                        if (i == retries - 1) {
                            throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem making a request to the leader");
                        }
                    }
                }
            } catch (IOException e) {
                SolrException.log(log, "Error closing HttpSolrClient", e);
            }
        }
    }
    return leaderProps;
}
Also used : WaitForState(org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) Builder(org.apache.solr.client.solrj.impl.HttpSolrClient.Builder) IOException(java.io.IOException) Replica(org.apache.solr.common.cloud.Replica) TimeoutException(java.util.concurrent.TimeoutException) SolrException(org.apache.solr.common.SolrException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) SessionExpiredException(org.apache.zookeeper.KeeperException.SessionExpiredException) ConnectionLossException(org.apache.zookeeper.KeeperException.ConnectionLossException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) SolrCoreInitializationException(org.apache.solr.core.SolrCoreInitializationException) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) SolrException(org.apache.solr.common.SolrException)

Example 7 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class OverseerCollectionMessageHandler method commit.

void commit(NamedList results, String slice, Replica parentShardLeader) {
    log.debug("Calling soft commit to make sub shard updates visible");
    String coreUrl = new ZkCoreNodeProps(parentShardLeader).getCoreUrl();
    // HttpShardHandler is hard coded to send a QueryRequest hence we go direct
    // and we force open a searcher so that we have documents to show upon switching states
    UpdateResponse updateResponse = null;
    try {
        updateResponse = softCommit(coreUrl);
        processResponse(results, null, coreUrl, updateResponse, slice, Collections.emptySet());
    } catch (Exception e) {
        processResponse(results, e, coreUrl, updateResponse, slice, Collections.emptySet());
        throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to call distrib softCommit on: " + coreUrl, e);
    }
}
Also used : UpdateResponse(org.apache.solr.client.solrj.response.UpdateResponse) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) SolrServerException(org.apache.solr.client.solrj.SolrServerException) RemoteSolrException(org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException) IOException(java.io.IOException) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) RemoteSolrException(org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException) SolrException(org.apache.solr.common.SolrException)

Example 8 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class ManagedIndexSchema method getActiveReplicaCoreUrls.

protected static List<String> getActiveReplicaCoreUrls(ZkController zkController, String collection, String localCoreNodeName) {
    List<String> activeReplicaCoreUrls = new ArrayList<>();
    ZkStateReader zkStateReader = zkController.getZkStateReader();
    ClusterState clusterState = zkStateReader.getClusterState();
    Set<String> liveNodes = clusterState.getLiveNodes();
    Collection<Slice> activeSlices = clusterState.getActiveSlices(collection);
    if (activeSlices != null && activeSlices.size() > 0) {
        for (Slice next : activeSlices) {
            Map<String, Replica> replicasMap = next.getReplicasMap();
            if (replicasMap != null) {
                for (Map.Entry<String, Replica> entry : replicasMap.entrySet()) {
                    Replica replica = entry.getValue();
                    if (!localCoreNodeName.equals(replica.getName()) && replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
                        ZkCoreNodeProps replicaCoreProps = new ZkCoreNodeProps(replica);
                        activeReplicaCoreUrls.add(replicaCoreProps.getCoreUrl());
                    }
                }
            }
        }
    }
    return activeReplicaCoreUrls;
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ArrayList(java.util.ArrayList) Replica(org.apache.solr.common.cloud.Replica) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) Map(java.util.Map) HashMap(java.util.HashMap)

Example 9 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class SolrCmdDistributorTest method testMaxRetries.

private void testMaxRetries() throws IOException {
    final MockStreamingSolrClients streamingClients = new MockStreamingSolrClients(updateShardHandler);
    SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(streamingClients, 5, 0);
    streamingClients.setExp(Exp.CONNECT_EXCEPTION);
    ArrayList<Node> nodes = new ArrayList<>();
    final HttpSolrClient solrclient1 = (HttpSolrClient) clients.get(0);
    final AtomicInteger retries = new AtomicInteger();
    ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient1.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
    RetryNode retryNode = new RetryNode(new ZkCoreNodeProps(nodeProps), null, "collection1", "shard1") {

        @Override
        public boolean checkRetry() {
            retries.incrementAndGet();
            return true;
        }
    };
    nodes.add(retryNode);
    AddUpdateCommand cmd = new AddUpdateCommand(null);
    cmd.solrDoc = sdoc("id", id.incrementAndGet());
    ModifiableSolrParams params = new ModifiableSolrParams();
    cmdDistrib.distribAdd(cmd, nodes, params);
    cmdDistrib.finish();
    assertEquals(6, retries.get());
    assertEquals(1, cmdDistrib.getErrors().size());
}
Also used : HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) StdNode(org.apache.solr.update.SolrCmdDistributor.StdNode) Node(org.apache.solr.update.SolrCmdDistributor.Node) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Example 10 with ZkCoreNodeProps

use of org.apache.solr.common.cloud.ZkCoreNodeProps in project lucene-solr by apache.

the class SolrCmdDistributorTest method testOneRetry.

private void testOneRetry() throws Exception {
    final HttpSolrClient solrclient = (HttpSolrClient) clients.get(0);
    long numFoundBefore = solrclient.query(new SolrQuery("*:*")).getResults().getNumFound();
    final MockStreamingSolrClients streamingClients = new MockStreamingSolrClients(updateShardHandler);
    SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(streamingClients, 5, 0);
    streamingClients.setExp(Exp.CONNECT_EXCEPTION);
    ArrayList<Node> nodes = new ArrayList<>();
    ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
    final AtomicInteger retries = new AtomicInteger();
    nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, solrclient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
    RetryNode retryNode = new RetryNode(new ZkCoreNodeProps(nodeProps), null, "collection1", "shard1") {

        @Override
        public boolean checkRetry() {
            streamingClients.setExp(null);
            retries.incrementAndGet();
            return true;
        }
    };
    nodes.add(retryNode);
    AddUpdateCommand cmd = new AddUpdateCommand(null);
    cmd.solrDoc = sdoc("id", id.incrementAndGet());
    ModifiableSolrParams params = new ModifiableSolrParams();
    CommitUpdateCommand ccmd = new CommitUpdateCommand(null, false);
    cmdDistrib.distribAdd(cmd, nodes, params);
    cmdDistrib.distribCommit(ccmd, nodes, params);
    cmdDistrib.finish();
    assertEquals(1, retries.get());
    long numFoundAfter = solrclient.query(new SolrQuery("*:*")).getResults().getNumFound();
    // we will get java.net.ConnectException which we retry on
    assertEquals(numFoundBefore + 1, numFoundAfter);
    assertEquals(0, cmdDistrib.getErrors().size());
}
Also used : RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) StdNode(org.apache.solr.update.SolrCmdDistributor.StdNode) Node(org.apache.solr.update.SolrCmdDistributor.Node) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) SolrQuery(org.apache.solr.client.solrj.SolrQuery) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Aggregations

ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)47 Replica (org.apache.solr.common.cloud.Replica)24 ArrayList (java.util.ArrayList)22 Slice (org.apache.solr.common.cloud.Slice)20 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)16 SolrException (org.apache.solr.common.SolrException)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)12 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)12 RetryNode (org.apache.solr.update.SolrCmdDistributor.RetryNode)12 StdNode (org.apache.solr.update.SolrCmdDistributor.StdNode)12 Node (org.apache.solr.update.SolrCmdDistributor.Node)11 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)10 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)10 SolrQuery (org.apache.solr.client.solrj.SolrQuery)9 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)8 KeeperException (org.apache.zookeeper.KeeperException)8 SolrServerException (org.apache.solr.client.solrj.SolrServerException)7 Random (java.util.Random)6 NamedList (org.apache.solr.common.util.NamedList)6