Search in sources :

Example 1 with Error

use of org.apache.solr.update.SolrCmdDistributor.Error in project lucene-solr by apache.

the class SolrCmdDistributorTest method test.

@Test
@ShardsFixed(num = 4)
public void test() throws Exception {
    del("*:*");
    SolrCmdDistributor cmdDistrib = new SolrCmdDistributor(updateShardHandler);
    ModifiableSolrParams params = new ModifiableSolrParams();
    List<Node> nodes = new ArrayList<>();
    ZkNodeProps nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, ((HttpSolrClient) controlClient).getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
    nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
    // add one doc to controlClient
    AddUpdateCommand cmd = new AddUpdateCommand(null);
    cmd.solrDoc = sdoc("id", id.incrementAndGet());
    params = new ModifiableSolrParams();
    cmdDistrib.distribAdd(cmd, nodes, params);
    CommitUpdateCommand ccmd = new CommitUpdateCommand(null, false);
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribCommit(ccmd, nodes, params);
    cmdDistrib.finish();
    List<Error> errors = cmdDistrib.getErrors();
    assertEquals(errors.toString(), 0, errors.size());
    long numFound = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
    assertEquals(1, numFound);
    HttpSolrClient client = (HttpSolrClient) clients.get(0);
    nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, client.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
    nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
    // add another 2 docs to control and 3 to client
    cmdDistrib = new SolrCmdDistributor(updateShardHandler);
    cmd.solrDoc = sdoc("id", id.incrementAndGet());
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribAdd(cmd, nodes, params);
    int id2 = id.incrementAndGet();
    AddUpdateCommand cmd2 = new AddUpdateCommand(null);
    cmd2.solrDoc = sdoc("id", id2);
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribAdd(cmd2, nodes, params);
    AddUpdateCommand cmd3 = new AddUpdateCommand(null);
    cmd3.solrDoc = sdoc("id", id.incrementAndGet());
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribAdd(cmd3, Collections.singletonList(nodes.get(1)), params);
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribCommit(ccmd, nodes, params);
    cmdDistrib.finish();
    errors = cmdDistrib.getErrors();
    assertEquals(errors.toString(), 0, errors.size());
    SolrDocumentList results = controlClient.query(new SolrQuery("*:*")).getResults();
    numFound = results.getNumFound();
    assertEquals(results.toString(), 3, numFound);
    numFound = client.query(new SolrQuery("*:*")).getResults().getNumFound();
    assertEquals(3, numFound);
    // now delete doc 2 which is on both control and client1
    DeleteUpdateCommand dcmd = new DeleteUpdateCommand(null);
    dcmd.id = Integer.toString(id2);
    cmdDistrib = new SolrCmdDistributor(updateShardHandler);
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribDelete(dcmd, nodes, params);
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribCommit(ccmd, nodes, params);
    cmdDistrib.finish();
    errors = cmdDistrib.getErrors();
    assertEquals(errors.toString(), 0, errors.size());
    results = controlClient.query(new SolrQuery("*:*")).getResults();
    numFound = results.getNumFound();
    assertEquals(results.toString(), 2, numFound);
    numFound = client.query(new SolrQuery("*:*")).getResults().getNumFound();
    assertEquals(results.toString(), 2, numFound);
    for (SolrClient c : clients) {
        c.optimize();
    //System.out.println(clients.get(0).request(new LukeRequest()));
    }
    cmdDistrib = new SolrCmdDistributor(updateShardHandler);
    int cnt = atLeast(303);
    for (int i = 0; i < cnt; i++) {
        nodes.clear();
        for (SolrClient c : clients) {
            if (random().nextBoolean()) {
                continue;
            }
            HttpSolrClient httpClient = (HttpSolrClient) c;
            nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
            nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
        }
        AddUpdateCommand c = new AddUpdateCommand(null);
        c.solrDoc = sdoc("id", id.incrementAndGet());
        if (nodes.size() > 0) {
            params = new ModifiableSolrParams();
            cmdDistrib.distribAdd(c, nodes, params);
        }
    }
    nodes.clear();
    for (SolrClient c : clients) {
        HttpSolrClient httpClient = (HttpSolrClient) c;
        nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP, httpClient.getBaseURL(), ZkStateReader.CORE_NAME_PROP, "");
        nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
    }
    final AtomicInteger commits = new AtomicInteger();
    for (JettySolrRunner jetty : jettys) {
        CoreContainer cores = jetty.getCoreContainer();
        try (SolrCore core = cores.getCore("collection1")) {
            core.getUpdateHandler().registerCommitCallback(new SolrEventListener() {

                @Override
                public void init(NamedList args) {
                }

                @Override
                public void postSoftCommit() {
                }

                @Override
                public void postCommit() {
                    commits.incrementAndGet();
                }

                @Override
                public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
                }
            });
        }
    }
    params = new ModifiableSolrParams();
    params.set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
    cmdDistrib.distribCommit(ccmd, nodes, params);
    cmdDistrib.finish();
    assertEquals(getShardCount(), commits.get());
    for (SolrClient c : clients) {
        NamedList<Object> resp = c.request(new LukeRequest());
        assertEquals("SOLR-3428: We only did adds - there should be no deletes", ((NamedList<Object>) resp.get("index")).get("numDocs"), ((NamedList<Object>) resp.get("index")).get("maxDoc"));
    }
    testMaxRetries();
    testOneRetry();
    testRetryNodeAgainstBadAddress();
    testRetryNodeWontRetrySocketError();
    testDistribOpenSearcher();
}
Also used : ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) SolrCore(org.apache.solr.core.SolrCore) RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) StdNode(org.apache.solr.update.SolrCmdDistributor.StdNode) Node(org.apache.solr.update.SolrCmdDistributor.Node) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrQuery(org.apache.solr.client.solrj.SolrQuery) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) CoreContainer(org.apache.solr.core.CoreContainer) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) StdNode(org.apache.solr.update.SolrCmdDistributor.StdNode) LukeRequest(org.apache.solr.client.solrj.request.LukeRequest) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) NamedList(org.apache.solr.common.util.NamedList) Error(org.apache.solr.update.SolrCmdDistributor.Error) SolrDocumentList(org.apache.solr.common.SolrDocumentList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrEventListener(org.apache.solr.core.SolrEventListener) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Test(org.junit.Test)

Example 2 with Error

use of org.apache.solr.update.SolrCmdDistributor.Error in project lucene-solr by apache.

the class DistributedUpdateProcessor method doFinish.

// TODO: optionally fail if n replicas are not reached...
private void doFinish() {
    // TODO: if not a forward and replication req is not specified, we could
    // send in a background thread    
    cmdDistrib.finish();
    List<Error> errors = cmdDistrib.getErrors();
    // TODO - we may need to tell about more than one error...
    List<Error> errorsForClient = new ArrayList<>(errors.size());
    for (final SolrCmdDistributor.Error error : errors) {
        if (error.req.node instanceof RetryNode) {
            // if it's a forward, any fail is a problem - 
            // otherwise we assume things are fine if we got it locally
            // until we start allowing min replication param
            errorsForClient.add(error);
            continue;
        }
        // succeeded 
        if (log.isWarnEnabled()) {
            log.warn("Error sending update to " + error.req.node.getBaseUrl(), error.e);
        }
        // Since it is not a forward request, for each fail, try to tell them to
        // recover - the doc was already added locally, so it should have been
        // legit
        DistribPhase phase = DistribPhase.parseParam(error.req.uReq.getParams().get(DISTRIB_UPDATE_PARAM));
        if (phase != DistribPhase.FROMLEADER)
            // don't have non-leaders try to recovery other nodes
            continue;
        // we don't want to run recovery on a node which missed a commit command
        if (error.req.uReq.getParams().get(COMMIT_END_POINT) != null)
            continue;
        final String replicaUrl = error.req.node.getUrl();
        // if the remote replica failed the request because of leader change (SOLR-6511), then fail the request
        String cause = (error.e instanceof SolrException) ? ((SolrException) error.e).getMetadata("cause") : null;
        if ("LeaderChanged".equals(cause)) {
            // let's just fail this request and let the client retry? or just call processAdd again?
            log.error("On " + cloudDesc.getCoreNodeName() + ", replica " + replicaUrl + " now thinks it is the leader! Failing the request to let the client retry! " + error.e);
            errorsForClient.add(error);
            continue;
        }
        String collection = null;
        String shardId = null;
        if (error.req.node instanceof StdNode) {
            StdNode stdNode = (StdNode) error.req.node;
            collection = stdNode.getCollection();
            shardId = stdNode.getShardId();
            // before we go setting other replicas to down, make sure we're still the leader!
            String leaderCoreNodeName = null;
            Exception getLeaderExc = null;
            Replica leaderProps = null;
            try {
                leaderProps = zkController.getZkStateReader().getLeader(collection, shardId);
                if (leaderProps != null) {
                    leaderCoreNodeName = leaderProps.getName();
                }
            } catch (Exception exc) {
                getLeaderExc = exc;
            }
            if (leaderCoreNodeName == null) {
                log.warn("Failed to determine if {} is still the leader for collection={} shardId={} " + "before putting {} into leader-initiated recovery", cloudDesc.getCoreNodeName(), collection, shardId, replicaUrl, getLeaderExc);
            }
            List<ZkCoreNodeProps> myReplicas = zkController.getZkStateReader().getReplicaProps(collection, cloudDesc.getShardId(), cloudDesc.getCoreNodeName());
            boolean foundErrorNodeInReplicaList = false;
            if (myReplicas != null) {
                for (ZkCoreNodeProps replicaProp : myReplicas) {
                    if (((Replica) replicaProp.getNodeProps()).getName().equals(((Replica) stdNode.getNodeProps().getNodeProps()).getName())) {
                        foundErrorNodeInReplicaList = true;
                        break;
                    }
                }
            }
            // If the client specified minRf and we didn't achieve the minRf, don't send recovery and let client retry
            if (replicationTracker != null && replicationTracker.getAchievedRf() < replicationTracker.minRf) {
                continue;
            }
            if (// we are still same leader
            leaderCoreNodeName != null && cloudDesc.getCoreNodeName().equals(leaderCoreNodeName) && // we found an error for one of replicas
            foundErrorNodeInReplicaList && !stdNode.getNodeProps().getCoreUrl().equals(leaderProps.getCoreUrl())) {
                // we do not want to put ourself into LIR
                try {
                    // if false, then the node is probably not "live" anymore
                    // and we do not need to send a recovery message
                    Throwable rootCause = SolrException.getRootCause(error.e);
                    log.error("Setting up to try to start recovery on replica {}", replicaUrl, rootCause);
                    zkController.ensureReplicaInLeaderInitiatedRecovery(req.getCore().getCoreContainer(), collection, shardId, stdNode.getNodeProps(), req.getCore().getCoreDescriptor(), false);
                } catch (Exception exc) {
                    Throwable setLirZnodeFailedCause = SolrException.getRootCause(exc);
                    log.error("Leader failed to set replica " + error.req.node.getUrl() + " state to DOWN due to: " + setLirZnodeFailedCause, setLirZnodeFailedCause);
                }
            } else {
                // not the leader anymore maybe or the error'd node is not my replica?
                if (!foundErrorNodeInReplicaList) {
                    log.warn("Core " + cloudDesc.getCoreNodeName() + " belonging to " + collection + " " + shardId + ", does not have error'd node " + stdNode.getNodeProps().getCoreUrl() + " as a replica. " + "No request recovery command will be sent!");
                } else {
                    log.warn("Core " + cloudDesc.getCoreNodeName() + " is no longer the leader for " + collection + " " + shardId + " or we tried to put ourself into LIR, no request recovery command will be sent!");
                }
            }
        }
    }
    if (replicationTracker != null) {
        rsp.getResponseHeader().add(UpdateRequest.REPFACT, replicationTracker.getAchievedRf());
        rsp.getResponseHeader().add(UpdateRequest.MIN_REPFACT, replicationTracker.minRf);
        replicationTracker = null;
    }
    if (0 < errorsForClient.size()) {
        throw new DistributedUpdatesAsyncException(errorsForClient);
    }
}
Also used : RetryNode(org.apache.solr.update.SolrCmdDistributor.RetryNode) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) Error(org.apache.solr.update.SolrCmdDistributor.Error) ArrayList(java.util.ArrayList) Error(org.apache.solr.update.SolrCmdDistributor.Error) Replica(org.apache.solr.common.cloud.Replica) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) SolrCmdDistributor(org.apache.solr.update.SolrCmdDistributor) StdNode(org.apache.solr.update.SolrCmdDistributor.StdNode) SolrException(org.apache.solr.common.SolrException)

Aggregations

ArrayList (java.util.ArrayList)2 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)2 Error (org.apache.solr.update.SolrCmdDistributor.Error)2 RetryNode (org.apache.solr.update.SolrCmdDistributor.RetryNode)2 StdNode (org.apache.solr.update.SolrCmdDistributor.StdNode)2 IOException (java.io.IOException)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 SolrClient (org.apache.solr.client.solrj.SolrClient)1 SolrQuery (org.apache.solr.client.solrj.SolrQuery)1 SolrServerException (org.apache.solr.client.solrj.SolrServerException)1 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)1 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)1 LukeRequest (org.apache.solr.client.solrj.request.LukeRequest)1 SolrDocumentList (org.apache.solr.common.SolrDocumentList)1 SolrException (org.apache.solr.common.SolrException)1 Replica (org.apache.solr.common.cloud.Replica)1 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)1 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)1 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)1 NamedList (org.apache.solr.common.util.NamedList)1