Search in sources :

Example 51 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class AbstractFullDistribZkTestBase method queryAndCompareReplicas.

/**
   * Executes a query against each live and active replica of the specified shard
   * and aserts that the results are identical.
   *
   * @see #queryAndCompare
   */
public QueryResponse queryAndCompareReplicas(SolrParams params, String shard) throws Exception {
    ArrayList<SolrClient> shardClients = new ArrayList<>(7);
    updateMappingsFromZk(jettys, clients);
    ZkStateReader zkStateReader = cloudClient.getZkStateReader();
    List<CloudJettyRunner> solrJetties = shardToJetty.get(shard);
    assertNotNull("no jetties found for shard: " + shard, solrJetties);
    for (CloudJettyRunner cjetty : solrJetties) {
        ZkNodeProps props = cjetty.info;
        String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP);
        boolean active = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP)) == Replica.State.ACTIVE;
        boolean live = zkStateReader.getClusterState().liveNodesContain(nodeName);
        if (active && live) {
            shardClients.add(cjetty.client.solrClient);
        }
    }
    return queryAndCompare(params, shardClients);
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) CloudSolrClient(org.apache.solr.client.solrj.impl.CloudSolrClient) SolrClient(org.apache.solr.client.solrj.SolrClient) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList)

Example 52 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class AbstractFullDistribZkTestBase method getLeaderUrlFromZk.

protected ZkCoreNodeProps getLeaderUrlFromZk(String collection, String slice) {
    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
    ZkNodeProps leader = clusterState.getLeader(collection, slice);
    if (leader == null) {
        throw new RuntimeException("Could not find leader:" + collection + " " + slice);
    }
    return new ZkCoreNodeProps(leader);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps)

Example 53 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class OverseerElectionContext method runLeaderProcess.

/* 
   * weAreReplacement: has someone else been the leader already?
   */
@Override
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException, InterruptedException, IOException {
    String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
    ActionThrottle lt;
    try (SolrCore core = cc.getCore(coreName)) {
        if (core == null) {
            if (cc.isShutDown()) {
                return;
            } else {
                throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
            }
        }
        MDCLoggingContext.setCore(core);
        lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
    }
    try {
        lt.minimumWaitBetweenActions();
        lt.markAttemptingAction();
        int leaderVoteWait = cc.getZkController().getLeaderVoteWait();
        log.debug("Running the leader process for shard={} and weAreReplacement={} and leaderVoteWait={}", shardId, weAreReplacement, leaderVoteWait);
        // clear the leader in clusterstate
        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(), ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection);
        Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m));
        boolean allReplicasInLine = false;
        if (!weAreReplacement) {
            allReplicasInLine = waitForReplicasToComeUp(leaderVoteWait);
        } else {
            allReplicasInLine = areAllReplicasParticipating();
        }
        if (isClosed) {
            // re-register the cores and handle a new leadership election.
            return;
        }
        Replica.Type replicaType;
        try (SolrCore core = cc.getCore(coreName)) {
            if (core == null) {
                if (!zkController.getCoreContainer().isShutDown()) {
                    cancelElection();
                    throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
                } else {
                    return;
                }
            }
            replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
            // should I be leader?
            if (weAreReplacement && !shouldIBeLeader(leaderProps, core, weAreReplacement)) {
                rejoinLeaderElection(core);
                return;
            }
            log.info("I may be the new leader - try and sync");
            // we are going to attempt to be the leader
            // first cancel any current recovery
            core.getUpdateHandler().getSolrCoreState().cancelRecovery();
            if (weAreReplacement) {
                // wait a moment for any floating updates to finish
                try {
                    Thread.sleep(2500);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, e);
                }
            }
            PeerSync.PeerSyncResult result = null;
            boolean success = false;
            try {
                result = syncStrategy.sync(zkController, core, leaderProps, weAreReplacement);
                success = result.isSuccess();
            } catch (Exception e) {
                SolrException.log(log, "Exception while trying to sync", e);
                result = PeerSync.PeerSyncResult.failure();
            }
            UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
            if (!success) {
                boolean hasRecentUpdates = false;
                if (ulog != null) {
                    // TODO: we could optimize this if necessary
                    try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
                        hasRecentUpdates = !recentUpdates.getVersions(1).isEmpty();
                    }
                }
                if (!hasRecentUpdates) {
                    // before, so become leader anyway if no one else has any versions either
                    if (result.getOtherHasVersions().orElse(false)) {
                        log.info("We failed sync, but we have no versions - we can't sync in that case. But others have some versions, so we should not become leader");
                        success = false;
                    } else {
                        log.info("We failed sync, but we have no versions - we can't sync in that case - we were active before, so become leader anyway");
                        success = true;
                    }
                }
            }
            // solrcloud_debug
            if (log.isDebugEnabled()) {
                try {
                    RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
                    SolrIndexSearcher searcher = searchHolder.get();
                    try {
                        log.debug(core.getCoreContainer().getZkController().getNodeName() + " synched " + searcher.search(new MatchAllDocsQuery(), 1).totalHits);
                    } finally {
                        searchHolder.decref();
                    }
                } catch (Exception e) {
                    log.error("Error in solrcloud_debug block", e);
                }
            }
            if (!success) {
                rejoinLeaderElection(core);
                return;
            }
        }
        boolean isLeader = true;
        if (!isClosed) {
            try {
                // we must check LIR before registering as leader
                checkLIR(coreName, allReplicasInLine);
                if (replicaType == Replica.Type.TLOG) {
                    // stop replicate from old leader
                    zkController.stopReplicationFromLeader(coreName);
                    if (weAreReplacement) {
                        try (SolrCore core = cc.getCore(coreName)) {
                            Future<UpdateLog.RecoveryInfo> future = core.getUpdateHandler().getUpdateLog().recoverFromCurrentLog();
                            if (future != null) {
                                log.info("Replaying tlog before become new leader");
                                future.get();
                            } else {
                                log.info("New leader does not have old tlog to replay");
                            }
                        }
                    }
                }
                super.runLeaderProcess(weAreReplacement, 0);
                try (SolrCore core = cc.getCore(coreName)) {
                    if (core != null) {
                        core.getCoreDescriptor().getCloudDescriptor().setLeader(true);
                        publishActiveIfRegisteredAndNotActive(core);
                    } else {
                        return;
                    }
                }
                log.info("I am the new leader: " + ZkCoreNodeProps.getCoreUrl(leaderProps) + " " + shardId);
                // we made it as leader - send any recovery requests we need to
                syncStrategy.requestRecoveries();
            } catch (Exception e) {
                isLeader = false;
                SolrException.log(log, "There was a problem trying to register as the leader", e);
                try (SolrCore core = cc.getCore(coreName)) {
                    if (core == null) {
                        log.debug("SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
                        return;
                    }
                    core.getCoreDescriptor().getCloudDescriptor().setLeader(false);
                    // we could not publish ourselves as leader - try and rejoin election
                    rejoinLeaderElection(core);
                }
            }
            if (isLeader) {
                // check for any replicas in my shard that were set to down by the previous leader
                try {
                    startLeaderInitiatedRecoveryOnReplicas(coreName);
                } catch (Exception exc) {
                // don't want leader election to fail because of
                // an error trying to tell others to recover
                }
            }
        } else {
            cancelElection();
        }
    } finally {
        MDCLoggingContext.clear();
    }
}
Also used : SolrCore(org.apache.solr.core.SolrCore) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) PeerSync(org.apache.solr.update.PeerSync) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) NodeExistsException(org.apache.zookeeper.KeeperException.NodeExistsException) UpdateLog(org.apache.solr.update.UpdateLog) SolrException(org.apache.solr.common.SolrException)

Example 54 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class ConfigSetsHandler method sendToZk.

protected void sendToZk(SolrQueryResponse rsp, ConfigSetOperation operation, Map<String, Object> result) throws KeeperException, InterruptedException {
    if (result != null) {
        // We need to differentiate between collection and configsets actions since they currently
        // use the same underlying queue.
        result.put(QUEUE_OPERATION, CONFIGSETS_ACTION_PREFIX + operation.action.toLower());
        ZkNodeProps props = new ZkNodeProps(result);
        handleResponse(operation.action.toLower(), props, rsp, DEFAULT_ZK_TIMEOUT);
    }
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps)

Example 55 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class RebalanceLeaders method rejoinElection.

private void rejoinElection(String collectionName, Slice slice, String electionNode, String core, boolean rejoinAtHead) throws KeeperException, InterruptedException {
    Replica replica = slice.getReplica(LeaderElector.getNodeName(electionNode));
    Map<String, Object> propMap = new HashMap<>();
    propMap.put(COLLECTION_PROP, collectionName);
    propMap.put(SHARD_ID_PROP, slice.getName());
    propMap.put(QUEUE_OPERATION, REBALANCELEADERS.toLower());
    propMap.put(CORE_NAME_PROP, core);
    propMap.put(CORE_NODE_NAME_PROP, replica.getName());
    propMap.put(ZkStateReader.BASE_URL_PROP, replica.getProperties().get(ZkStateReader.BASE_URL_PROP));
    // Get ourselves to be first in line.
    propMap.put(REJOIN_AT_HEAD_PROP, Boolean.toString(rejoinAtHead));
    propMap.put(ELECTION_NODE_PROP, electionNode);
    String asyncId = REBALANCELEADERS.toLower() + "_" + core + "_" + Math.abs(System.nanoTime());
    propMap.put(ASYNC, asyncId);
    ZkNodeProps m = new ZkNodeProps(propMap);
    // I'm constructing my own response
    SolrQueryResponse rspIgnore = new SolrQueryResponse();
    // Want to construct my own response here.
    collectionsHandler.handleResponse(REBALANCELEADERS.toLower(), m, rspIgnore);
}
Also used : SolrQueryResponse(org.apache.solr.response.SolrQueryResponse) HashMap(java.util.HashMap) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) Replica(org.apache.solr.common.cloud.Replica)

Aggregations

ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)91 SolrException (org.apache.solr.common.SolrException)35 HashMap (java.util.HashMap)28 Replica (org.apache.solr.common.cloud.Replica)22 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)20 ArrayList (java.util.ArrayList)19 Slice (org.apache.solr.common.cloud.Slice)19 KeeperException (org.apache.zookeeper.KeeperException)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)16 Test (org.junit.Test)16 DocCollection (org.apache.solr.common.cloud.DocCollection)15 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)14 Map (java.util.Map)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)10 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)10 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)10 NamedList (org.apache.solr.common.util.NamedList)10 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)9 SolrCore (org.apache.solr.core.SolrCore)8