Search in sources :

Example 1 with Type

use of org.apache.solr.common.cloud.Replica.Type in project lucene-solr by apache.

the class ZkController method register.

/**
   * Register shard with ZooKeeper.
   *
   * @return the shardId for the SolrCore
   */
public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration, boolean skipRecovery) throws Exception {
    try (SolrCore core = cc.getCore(desc.getName())) {
        MDCLoggingContext.setCore(core);
    }
    try {
        // pre register has published our down state
        final String baseUrl = getBaseUrl();
        final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
        final String collection = cloudDesc.getCollectionName();
        final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
        assert coreZkNodeName != null : "we should have a coreNodeName by now";
        String shardId = cloudDesc.getShardId();
        Map<String, Object> props = new HashMap<>();
        // we only put a subset of props into the leader node
        props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
        props.put(ZkStateReader.CORE_NAME_PROP, coreName);
        props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
        log.debug("Register replica - core:{} address:{} collection:{} shard:{}", coreName, baseUrl, cloudDesc.getCollectionName(), shardId);
        ZkNodeProps leaderProps = new ZkNodeProps(props);
        try {
            // If we're a preferred leader, insert ourselves at the head of the queue
            boolean joinAtHead = false;
            Replica replica = zkStateReader.getClusterState().getReplica(collection, coreZkNodeName);
            if (replica != null) {
                joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
            }
            //TODO WHy would replica be null?
            if (replica == null || replica.getType() != Type.PULL) {
                joinElection(desc, afterExpiration, joinAtHead);
            } else if (replica.getType() == Type.PULL) {
                if (joinAtHead) {
                    log.warn("Replica {} was designated as preferred leader but it's type is {}, It won't join election", coreZkNodeName, Type.PULL);
                }
                log.debug("Replica {} skipping election because it's type is {}", coreZkNodeName, Type.PULL);
                startReplicationFromLeader(coreName, false);
            }
        } catch (InterruptedException e) {
            // Restore the interrupted status
            Thread.currentThread().interrupt();
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        } catch (KeeperException | IOException e) {
            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
        }
        // in this case, we want to wait for the leader as long as the leader might
        // wait for a vote, at least - but also long enough that a large cluster has
        // time to get its act together
        String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
        String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
        log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
        boolean isLeader = leaderUrl.equals(ourUrl);
        Replica.Type replicaType = zkStateReader.getClusterState().getCollection(collection).getReplica(coreZkNodeName).getType();
        assert !(isLeader && replicaType == Type.PULL) : "Pull replica became leader!";
        try (SolrCore core = cc.getCore(desc.getName())) {
            // recover from local transaction log and wait for it to complete before
            // going active
            // TODO: should this be moved to another thread? To recoveryStrat?
            // TODO: should this actually be done earlier, before (or as part of)
            // leader election perhaps?
            UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
            boolean isTlogReplicaAndNotLeader = replicaType == Replica.Type.TLOG && !isLeader;
            if (isTlogReplicaAndNotLeader) {
                String commitVersion = ReplicateFromLeader.getCommitVersion(core);
                if (commitVersion != null) {
                    ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
                }
            }
            // we will call register again after zk expiration and on reload
            if (!afterExpiration && !core.isReloaded() && ulog != null && !isTlogReplicaAndNotLeader) {
                // disable recovery in case shard is in construction state (for shard splits)
                Slice slice = getClusterState().getSlice(collection, shardId);
                if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
                    Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
                    if (recoveryFuture != null) {
                        log.info("Replaying tlog for " + ourUrl + " during startup... NOTE: This can take a while.");
                        // NOTE: this could potentially block for
                        recoveryFuture.get();
                    // minutes or more!
                    // TODO: public as recovering in the mean time?
                    // TODO: in the future we could do peersync in parallel with recoverFromLog
                    } else {
                        log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
                    }
                }
            }
            boolean didRecovery = checkRecovery(recoverReloadedCores, isLeader, skipRecovery, collection, coreZkNodeName, core, cc, afterExpiration);
            if (!didRecovery) {
                if (isTlogReplicaAndNotLeader) {
                    startReplicationFromLeader(coreName, true);
                }
                publish(desc, Replica.State.ACTIVE);
            }
            core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
        }
        // make sure we have an update cluster state right away
        zkStateReader.forceUpdateCollection(collection);
        return shardId;
    } finally {
        MDCLoggingContext.clear();
    }
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) IOException(java.io.IOException) Type(org.apache.solr.common.cloud.Replica.Type) Replica(org.apache.solr.common.cloud.Replica) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) Slice(org.apache.solr.common.cloud.Slice) UpdateLog(org.apache.solr.update.UpdateLog) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Replica (org.apache.solr.common.cloud.Replica)1 Type (org.apache.solr.common.cloud.Replica.Type)1 Slice (org.apache.solr.common.cloud.Slice)1 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)1 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)1 SolrCore (org.apache.solr.core.SolrCore)1 UpdateLog (org.apache.solr.update.UpdateLog)1 KeeperException (org.apache.zookeeper.KeeperException)1