Search in sources :

Example 6 with CloudDescriptor

use of org.apache.solr.cloud.CloudDescriptor in project lucene-solr by apache.

the class IndexFetcher method getLeaderReplica.

private Replica getLeaderReplica() throws InterruptedException {
    ZkController zkController = solrCore.getCoreContainer().getZkController();
    CloudDescriptor cd = solrCore.getCoreDescriptor().getCloudDescriptor();
    Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(cd.getCollectionName(), cd.getShardId());
    return leaderReplica;
}
Also used : ZkController(org.apache.solr.cloud.ZkController) Replica(org.apache.solr.common.cloud.Replica) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor)

Example 7 with CloudDescriptor

use of org.apache.solr.cloud.CloudDescriptor in project lucene-solr by apache.

the class HttpShardHandler method prepDistributed.

@Override
public void prepDistributed(ResponseBuilder rb) {
    final SolrQueryRequest req = rb.req;
    final SolrParams params = req.getParams();
    final String shards = params.get(ShardParams.SHARDS);
    // since the cost of grabbing cloud state is still up in the air, we grab it only
    // if we need it.
    ClusterState clusterState = null;
    Map<String, Slice> slices = null;
    CoreDescriptor coreDescriptor = req.getCore().getCoreDescriptor();
    CloudDescriptor cloudDescriptor = coreDescriptor.getCloudDescriptor();
    ZkController zkController = req.getCore().getCoreContainer().getZkController();
    final ReplicaListTransformer replicaListTransformer = httpShardHandlerFactory.getReplicaListTransformer(req);
    if (shards != null) {
        List<String> lst = StrUtils.splitSmart(shards, ",", true);
        rb.shards = lst.toArray(new String[lst.size()]);
        rb.slices = new String[rb.shards.length];
        if (zkController != null) {
            // figure out which shards are slices
            for (int i = 0; i < rb.shards.length; i++) {
                if (rb.shards[i].indexOf('/') < 0) {
                    // this is a logical shard
                    rb.slices[i] = rb.shards[i];
                    rb.shards[i] = null;
                }
            }
        }
    } else if (zkController != null) {
        // we weren't provided with an explicit list of slices to query via "shards", so use the cluster state
        clusterState = zkController.getClusterState();
        String shardKeys = params.get(ShardParams._ROUTE_);
        // This will be the complete list of slices we need to query for this request.
        slices = new HashMap<>();
        // we need to find out what collections this request is for.
        // A comma-separated list of specified collections.
        // Eg: "collection1,collection2,collection3"
        String collections = params.get("collection");
        if (collections != null) {
            // If there were one or more collections specified in the query, split
            // each parameter and store as a separate member of a List.
            List<String> collectionList = StrUtils.splitSmart(collections, ",", true);
            // cloud state and add them to the Map 'slices'.
            for (String collectionName : collectionList) {
                // The original code produced <collection-name>_<shard-name> when the collections
                // parameter was specified (see ClientUtils.appendMap)
                // Is this necessary if ony one collection is specified?
                // i.e. should we change multiCollection to collectionList.size() > 1?
                addSlices(slices, clusterState, params, collectionName, shardKeys, true);
            }
        } else {
            // just this collection
            String collectionName = cloudDescriptor.getCollectionName();
            addSlices(slices, clusterState, params, collectionName, shardKeys, false);
        }
        // Store the logical slices in the ResponseBuilder and create a new
        // String array to hold the physical shards (which will be mapped
        // later).
        rb.slices = slices.keySet().toArray(new String[slices.size()]);
        rb.shards = new String[rb.slices.length];
    }
    //
    if (zkController != null) {
        // Are we hosting the shard that this request is for, and are we active? If so, then handle it ourselves
        // and make it a non-distributed request.
        String ourSlice = cloudDescriptor.getShardId();
        String ourCollection = cloudDescriptor.getCollectionName();
        // Some requests may only be fulfilled by replicas of type Replica.Type.NRT
        boolean onlyNrtReplicas = Boolean.TRUE == req.getContext().get(ONLY_NRT_REPLICAS);
        if (rb.slices.length == 1 && rb.slices[0] != null && // handle the <collection>_<slice> format
        (rb.slices[0].equals(ourSlice) || rb.slices[0].equals(ourCollection + "_" + ourSlice)) && cloudDescriptor.getLastPublished() == Replica.State.ACTIVE && (!onlyNrtReplicas || cloudDescriptor.getReplicaType() == Replica.Type.NRT)) {
            // currently just a debugging parameter to check distrib search on a single node
            boolean shortCircuit = params.getBool("shortCircuit", true);
            String targetHandler = params.get(ShardParams.SHARDS_QT);
            // if a different handler is specified, don't short-circuit
            shortCircuit = shortCircuit && targetHandler == null;
            if (shortCircuit) {
                rb.isDistrib = false;
                rb.shortCircuitedURL = ZkCoreNodeProps.getCoreUrl(zkController.getBaseUrl(), coreDescriptor.getName());
                return;
            }
        // We shouldn't need to do anything to handle "shard.rows" since it was previously meant to be an optimization?
        }
        for (int i = 0; i < rb.shards.length; i++) {
            final List<String> shardUrls;
            if (rb.shards[i] != null) {
                shardUrls = StrUtils.splitSmart(rb.shards[i], "|", true);
                replicaListTransformer.transform(shardUrls);
            } else {
                if (clusterState == null) {
                    clusterState = zkController.getClusterState();
                    slices = clusterState.getSlicesMap(cloudDescriptor.getCollectionName());
                }
                String sliceName = rb.slices[i];
                Slice slice = slices.get(sliceName);
                if (slice == null) {
                    // Treat this the same as "all servers down" for a slice, and let things continue
                    // if partial results are acceptable
                    rb.shards[i] = "";
                    continue;
                // throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "no such shard: " + sliceName);
                }
                final Predicate<Replica> isShardLeader = new Predicate<Replica>() {

                    private Replica shardLeader = null;

                    @Override
                    public boolean test(Replica replica) {
                        if (shardLeader == null) {
                            try {
                                shardLeader = zkController.getZkStateReader().getLeaderRetry(cloudDescriptor.getCollectionName(), slice.getName());
                            } catch (InterruptedException e) {
                                throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + slice.getName() + " in collection " + cloudDescriptor.getCollectionName(), e);
                            } catch (SolrException e) {
                                if (log.isDebugEnabled()) {
                                    log.debug("Exception finding leader for shard {} in collection {}. Collection State: {}", slice.getName(), cloudDescriptor.getCollectionName(), zkController.getZkStateReader().getClusterState().getCollectionOrNull(cloudDescriptor.getCollectionName()));
                                }
                                throw e;
                            }
                        }
                        return replica.getName().equals(shardLeader.getName());
                    }
                };
                final List<Replica> eligibleSliceReplicas = collectEligibleReplicas(slice, clusterState, onlyNrtReplicas, isShardLeader);
                replicaListTransformer.transform(eligibleSliceReplicas);
                shardUrls = new ArrayList<>(eligibleSliceReplicas.size());
                for (Replica replica : eligibleSliceReplicas) {
                    String url = ZkCoreNodeProps.getCoreUrl(replica);
                    shardUrls.add(url);
                }
                if (shardUrls.isEmpty()) {
                    boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
                    if (!tolerant) {
                        // stop the check when there are no replicas available for a shard
                        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "no servers hosting shard: " + rb.slices[i]);
                    }
                }
            }
            // And now recreate the | delimited list of equivalent servers
            rb.shards[i] = createSliceShardsStr(shardUrls);
        }
    }
    String shards_rows = params.get(ShardParams.SHARDS_ROWS);
    if (shards_rows != null) {
        rb.shards_rows = Integer.parseInt(shards_rows);
    }
    String shards_start = params.get(ShardParams.SHARDS_START);
    if (shards_start != null) {
        rb.shards_start = Integer.parseInt(shards_start);
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) HashMap(java.util.HashMap) CoreDescriptor(org.apache.solr.core.CoreDescriptor) Replica(org.apache.solr.common.cloud.Replica) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor) Predicate(java.util.function.Predicate) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) Slice(org.apache.solr.common.cloud.Slice) ZkController(org.apache.solr.cloud.ZkController) SolrParams(org.apache.solr.common.params.SolrParams) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ArrayList(java.util.ArrayList) NamedList(org.apache.solr.common.util.NamedList) List(java.util.List) SolrException(org.apache.solr.common.SolrException)

Example 8 with CloudDescriptor

use of org.apache.solr.cloud.CloudDescriptor in project lucene-solr by apache.

the class PrepRecoveryOp method execute.

@Override
public void execute(CallInfo it) throws Exception {
    assert TestInjection.injectPrepRecoveryOpPauseForever();
    final SolrParams params = it.req.getParams();
    String cname = params.get(CoreAdminParams.CORE);
    if (cname == null) {
        cname = "";
    }
    String nodeName = params.get("nodeName");
    String coreNodeName = params.get("coreNodeName");
    Replica.State waitForState = Replica.State.getState(params.get(ZkStateReader.STATE_PROP));
    Boolean checkLive = params.getBool("checkLive");
    Boolean onlyIfLeader = params.getBool("onlyIfLeader");
    Boolean onlyIfLeaderActive = params.getBool("onlyIfLeaderActive");
    CoreContainer coreContainer = it.handler.coreContainer;
    // wait long enough for the leader conflict to work itself out plus a little extra
    int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait();
    int maxTries = (int) Math.round(conflictWaitMs / 1000) + 3;
    log.info("Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}, maxTime: {} s", coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive, maxTries);
    Replica.State state = null;
    boolean live = false;
    int retry = 0;
    while (true) {
        try (SolrCore core = coreContainer.getCore(cname)) {
            if (core == null && retry == Math.min(30, maxTries)) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
            }
            if (core != null) {
                if (onlyIfLeader != null && onlyIfLeader) {
                    if (!core.getCoreDescriptor().getCloudDescriptor().isLeader()) {
                        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "We are not the leader");
                    }
                }
                // wait until we are sure the recovering node is ready
                // to accept updates
                CloudDescriptor cloudDescriptor = core.getCoreDescriptor().getCloudDescriptor();
                String collectionName = cloudDescriptor.getCollectionName();
                if (retry % 15 == 0) {
                    if (retry > 0 && log.isInfoEnabled())
                        log.info("After " + retry + " seconds, core " + cname + " (" + cloudDescriptor.getShardId() + " of " + cloudDescriptor.getCollectionName() + ") still does not have state: " + waitForState + "; forcing ClusterState update from ZooKeeper");
                    // force a cluster state update
                    coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName);
                }
                ClusterState clusterState = coreContainer.getZkController().getClusterState();
                DocCollection collection = clusterState.getCollection(collectionName);
                Slice slice = collection.getSlice(cloudDescriptor.getShardId());
                if (slice != null) {
                    final Replica replica = slice.getReplicasMap().get(coreNodeName);
                    if (replica != null) {
                        state = replica.getState();
                        live = clusterState.liveNodesContain(nodeName);
                        final Replica.State localState = cloudDescriptor.getLastPublished();
                        // TODO: This is funky but I've seen this in testing where the replica asks the
                        // leader to be in recovery? Need to track down how that happens ... in the meantime,
                        // this is a safeguard
                        boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null && onlyIfLeader && core.getName().equals(replica.getStr("core")) && waitForState == Replica.State.RECOVERING && localState == Replica.State.ACTIVE && state == Replica.State.ACTIVE);
                        if (leaderDoesNotNeedRecovery) {
                            log.warn("Leader " + core.getName() + " ignoring request to be in the recovering state because it is live and active.");
                        }
                        boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive && localState != Replica.State.ACTIVE;
                        log.info("In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() + ", thisCore=" + core.getName() + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery + ", isLeader? " + core.getCoreDescriptor().getCloudDescriptor().isLeader() + ", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString() + ", localState=" + localState + ", nodeName=" + nodeName + ", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult + ", nodeProps: " + replica);
                        if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) {
                            if (checkLive == null) {
                                break;
                            } else if (checkLive && live) {
                                break;
                            } else if (!checkLive && !live) {
                                break;
                            }
                        }
                    }
                }
            }
            if (retry++ == maxTries) {
                String collection = null;
                String leaderInfo = null;
                String shardId = null;
                try {
                    CloudDescriptor cloudDescriptor = core.getCoreDescriptor().getCloudDescriptor();
                    collection = cloudDescriptor.getCollectionName();
                    shardId = cloudDescriptor.getShardId();
                    leaderInfo = coreContainer.getZkController().getZkStateReader().getLeaderUrl(collection, shardId, 5000);
                } catch (Exception exc) {
                    leaderInfo = "Not available due to: " + exc;
                }
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "I was asked to wait on state " + waitForState + " for " + shardId + " in " + collection + " on " + nodeName + " but I still do not see the requested state. I see state: " + Objects.toString(state) + " live:" + live + " leader from ZK: " + leaderInfo);
            }
            if (coreContainer.isShutDown()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Solr is shutting down");
            }
            // solrcloud_debug
            if (log.isDebugEnabled() && core != null) {
                try {
                    LocalSolrQueryRequest r = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
                    CommitUpdateCommand commitCmd = new CommitUpdateCommand(r, false);
                    commitCmd.softCommit = true;
                    core.getUpdateHandler().commit(commitCmd);
                    RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
                    SolrIndexSearcher searcher = searchHolder.get();
                    try {
                        log.debug(core.getCoreContainer().getZkController().getNodeName() + " to replicate " + searcher.search(new MatchAllDocsQuery(), 1).totalHits + " gen:" + core.getDeletionPolicy().getLatestCommit().getGeneration() + " data:" + core.getDataDir());
                    } finally {
                        searchHolder.decref();
                    }
                } catch (Exception e) {
                    log.debug("Error in solrcloud_debug block", e);
                }
            }
        }
        Thread.sleep(1000);
    }
    log.info("Waited coreNodeName: " + coreNodeName + ", state: " + waitForState + ", checkLive: " + checkLive + ", onlyIfLeader: " + onlyIfLeader + " for: " + retry + " seconds.");
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) SolrCore(org.apache.solr.core.SolrCore) CommitUpdateCommand(org.apache.solr.update.CommitUpdateCommand) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Replica(org.apache.solr.common.cloud.Replica) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor) SolrException(org.apache.solr.common.SolrException) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) CoreContainer(org.apache.solr.core.CoreContainer) Slice(org.apache.solr.common.cloud.Slice) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) SolrParams(org.apache.solr.common.params.SolrParams) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException)

Example 9 with CloudDescriptor

use of org.apache.solr.cloud.CloudDescriptor in project lucene-solr by apache.

the class SolrCore method initializeMetrics.

@Override
public void initializeMetrics(SolrMetricManager manager, String registry, String scope) {
    newSearcherCounter = manager.counter(this, registry, "new", Category.SEARCHER.toString());
    newSearcherTimer = manager.timer(this, registry, "time", Category.SEARCHER.toString(), "new");
    newSearcherWarmupTimer = manager.timer(this, registry, "warmup", Category.SEARCHER.toString(), "new");
    newSearcherMaxReachedCounter = manager.counter(this, registry, "maxReached", Category.SEARCHER.toString(), "new");
    newSearcherOtherErrorsCounter = manager.counter(this, registry, "errors", Category.SEARCHER.toString(), "new");
    manager.registerGauge(this, registry, () -> name == null ? "(null)" : name, true, "coreName", Category.CORE.toString());
    manager.registerGauge(this, registry, () -> startTime, true, "startTime", Category.CORE.toString());
    manager.registerGauge(this, registry, () -> getOpenCount(), true, "refCount", Category.CORE.toString());
    manager.registerGauge(this, registry, () -> resourceLoader.getInstancePath().toString(), true, "instanceDir", Category.CORE.toString());
    manager.registerGauge(this, registry, () -> getIndexDir(), true, "indexDir", Category.CORE.toString());
    manager.registerGauge(this, registry, () -> getIndexSize(), true, "sizeInBytes", Category.INDEX.toString());
    manager.registerGauge(this, registry, () -> NumberUtils.readableSize(getIndexSize()), true, "size", Category.INDEX.toString());
    if (coreContainer != null) {
        manager.registerGauge(this, registry, () -> coreContainer.getCoreNames(this), true, "aliases", Category.CORE.toString());
        final CloudDescriptor cd = getCoreDescriptor().getCloudDescriptor();
        if (cd != null) {
            manager.registerGauge(this, registry, () -> {
                if (cd.getCollectionName() != null) {
                    return cd.getCollectionName();
                } else {
                    return "_notset_";
                }
            }, true, "collection", Category.CORE.toString());
            manager.registerGauge(this, registry, () -> {
                if (cd.getShardId() != null) {
                    return cd.getShardId();
                } else {
                    return "_auto_";
                }
            }, true, "shard", Category.CORE.toString());
        }
    }
    // initialize disk total / free metrics
    Path dataDirPath = Paths.get(dataDir);
    File dataDirFile = dataDirPath.toFile();
    manager.registerGauge(this, registry, () -> dataDirFile.getTotalSpace(), true, "totalSpace", Category.CORE.toString(), "fs");
    manager.registerGauge(this, registry, () -> dataDirFile.getUsableSpace(), true, "usableSpace", Category.CORE.toString(), "fs");
}
Also used : Path(java.nio.file.Path) File(java.io.File) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor)

Example 10 with CloudDescriptor

use of org.apache.solr.cloud.CloudDescriptor in project lucene-solr by apache.

the class DistributedUpdateProcessor method doDefensiveChecks.

private void doDefensiveChecks(DistribPhase phase) {
    boolean isReplayOrPeersync = (updateCommand.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0;
    if (isReplayOrPeersync)
        return;
    String from = req.getParams().get(DISTRIB_FROM);
    ClusterState clusterState = zkController.getClusterState();
    CloudDescriptor cloudDescriptor = req.getCore().getCoreDescriptor().getCloudDescriptor();
    DocCollection docCollection = clusterState.getCollection(collection);
    Slice mySlice = docCollection.getSlice(cloudDescriptor.getShardId());
    boolean localIsLeader = cloudDescriptor.isLeader();
    if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) {
        // from will be null on log replay
        String fromShard = req.getParams().get(DISTRIB_FROM_PARENT);
        if (fromShard != null) {
            if (mySlice.getState() == Slice.State.ACTIVE) {
                throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from parent shard leader but we are in active state");
            }
            // shard splitting case -- check ranges to see if we are a sub-shard
            Slice fromSlice = docCollection.getSlice(fromShard);
            DocRouter.Range parentRange = fromSlice.getRange();
            if (parentRange == null)
                parentRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
            if (mySlice.getRange() != null && !mySlice.getRange().isSubsetOf(parentRange)) {
                throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from parent shard leader but parent hash range is not superset of my range");
            }
        } else {
            // is it because of a routing rule?
            String fromCollection = req.getParams().get(DISTRIB_FROM_COLLECTION);
            if (fromCollection == null) {
                log.error("Request says it is coming from leader, but we are the leader: " + req.getParamString());
                SolrException solrExc = new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from leader, but we are the leader");
                solrExc.setMetadata("cause", "LeaderChanged");
                throw solrExc;
            }
        }
    }
    if ((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) {
        log.error("ClusterState says we are the leader, but locally we don't think so");
        throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "ClusterState says we are the leader (" + zkController.getBaseUrl() + "/" + req.getCore().getName() + "), but locally we don't think so. Request came from " + from);
    }
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) DocRouter(org.apache.solr.common.cloud.DocRouter) DocCollection(org.apache.solr.common.cloud.DocCollection) CloudDescriptor(org.apache.solr.cloud.CloudDescriptor) SolrException(org.apache.solr.common.SolrException)

Aggregations

CloudDescriptor (org.apache.solr.cloud.CloudDescriptor)15 Replica (org.apache.solr.common.cloud.Replica)8 SolrException (org.apache.solr.common.SolrException)6 ClusterState (org.apache.solr.common.cloud.ClusterState)6 Slice (org.apache.solr.common.cloud.Slice)6 ArrayList (java.util.ArrayList)5 ZkController (org.apache.solr.cloud.ZkController)5 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)5 DocCollection (org.apache.solr.common.cloud.DocCollection)4 SolrParams (org.apache.solr.common.params.SolrParams)4 NamedList (org.apache.solr.common.util.NamedList)4 HashMap (java.util.HashMap)3 List (java.util.List)3 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)3 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)3 File (java.io.File)2 IOException (java.io.IOException)2 Map (java.util.Map)2 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)2 SolrDocumentList (org.apache.solr.common.SolrDocumentList)2