Search in sources :

Example 21 with Slice

use of in project lucene-solr by apache.

the class MoveReplicaCmd method moveReplica.

private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {"moveReplica() : {}", Utils.toJSONString(message));
    ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
    String collection = message.getStr(COLLECTION_PROP);
    String targetNode = message.getStr("targetNode");
    String async = message.getStr(ASYNC);
    DocCollection coll = clusterState.getCollection(collection);
    if (coll == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
    Replica replica = null;
    if (message.containsKey(REPLICA_PROP)) {
        String replicaName = message.getStr(REPLICA_PROP);
        replica = coll.getReplica(replicaName);
        if (replica == null) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " replica: " + replicaName + " does not exist");
    } else {
        ocmh.checkRequired(message, SHARD_ID_PROP, "fromNode");
        String fromNode = message.getStr("fromNode");
        String shardId = message.getStr(SHARD_ID_PROP);
        Slice slice = clusterState.getCollection(collection).getSlice(shardId);
        List<Replica> sliceReplicas = new ArrayList<>(slice.getReplicas());
        Collections.shuffle(sliceReplicas, RANDOM);
        for (Replica r : slice.getReplicas()) {
            if (r.getNodeName().equals(fromNode)) {
                replica = r;
        if (replica == null) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection + " node: " + fromNode + " do not have any replica belong to shard: " + shardId);
    }"Replica will be moved {}", replica);
    Slice slice = null;
    for (Slice s : coll.getSlices()) {
        if (s.getReplicas().contains(replica)) {
            slice = s;
    assert slice != null;
    Object dataDir = replica.get("dataDir");
    if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) {
        moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll, replica, slice);
    } else {
        moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice);
Also used : Slice( ArrayList(java.util.ArrayList) DocCollection( Replica( SolrException(org.apache.solr.common.SolrException)

Example 22 with Slice

use of in project lucene-solr by apache.

the class OverseerAutoReplicaFailoverThread method doWork.

private void doWork() {
    // TODO: extract to configurable strategy class ??
    ClusterState clusterState = zkStateReader.getClusterState();
    //check if we have disabled autoAddReplicas cluster wide
    String autoAddReplicas = zkStateReader.getClusterProperty(ZkStateReader.AUTO_ADD_REPLICAS, (String) null);
    if (autoAddReplicas != null && autoAddReplicas.equals("false")) {
    if (clusterState != null) {
        if (clusterState.getZkClusterStateVersion() != null && clusterState.getZkClusterStateVersion().equals(lastClusterStateVersion) && baseUrlForBadNodes.size() == 0 && liveNodes.equals(clusterState.getLiveNodes())) {
            // nothing has changed, no work to do
        liveNodes = clusterState.getLiveNodes();
        lastClusterStateVersion = clusterState.getZkClusterStateVersion();
        Map<String, DocCollection> collections = clusterState.getCollectionsMap();
        for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
            log.debug("look at collection={}", entry.getKey());
            DocCollection docCollection = entry.getValue();
            if (!docCollection.getAutoAddReplicas()) {
                log.debug("Collection {} is not setup to use autoAddReplicas, skipping..", docCollection.getName());
            if (docCollection.getReplicationFactor() == null) {
                log.debug("Skipping collection because it has no defined replicationFactor, name={}", docCollection.getName());
            log.debug("Found collection, name={} replicationFactor={}", entry.getKey(), docCollection.getReplicationFactor());
            Collection<Slice> slices = docCollection.getSlices();
            for (Slice slice : slices) {
                if (slice.getState() == Slice.State.ACTIVE) {
                    final Collection<DownReplica> downReplicas = new ArrayList<DownReplica>();
                    int goodReplicas = findDownReplicasInSlice(clusterState, docCollection, slice, downReplicas);
                    log.debug("collection={} replicationFactor={} goodReplicaCount={}", docCollection.getName(), docCollection.getReplicationFactor(), goodReplicas);
                    if (downReplicas.size() > 0 && goodReplicas < docCollection.getReplicationFactor()) {
                        // badReplicaMap.put(collection, badReplicas);
                        processBadReplicas(entry.getKey(), downReplicas);
                    } else if (goodReplicas > docCollection.getReplicationFactor()) {
                        log.debug("There are too many replicas");
Also used : ClusterState( Slice( ArrayList(java.util.ArrayList) DocCollection( HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 23 with Slice

use of in project lucene-solr by apache.

the class OverseerAutoReplicaFailoverThread method getBestCreateUrl.

   * @return the best node to replace the badReplica on or null if there is no
   *         such node
static String getBestCreateUrl(ZkStateReader zkStateReader, DownReplica badReplica, Integer maxCoreCount) {
    assert badReplica != null;
    assert badReplica.collection != null;
    assert badReplica.slice != null;
    log.debug("getBestCreateUrl for " + badReplica.replica);
    Map<String, Counts> counts = new HashMap<>();
    Set<String> unsuitableHosts = new HashSet<>();
    Set<String> liveNodes = new HashSet<>(zkStateReader.getClusterState().getLiveNodes());
    Map<String, Integer> coresPerNode = new HashMap<>();
    ClusterState clusterState = zkStateReader.getClusterState();
    if (clusterState != null) {
        Map<String, DocCollection> collections = clusterState.getCollectionsMap();
        for (Map.Entry<String, DocCollection> entry : collections.entrySet()) {
            String collection = entry.getKey();
            log.debug("look at collection {} as possible create candidate", collection);
            DocCollection docCollection = entry.getValue();
            // TODO - only operate on collections with sharedfs failover = true ??
            Collection<Slice> slices = docCollection.getSlices();
            for (Slice slice : slices) {
                // only look at active shards
                if (slice.getState() == Slice.State.ACTIVE) {
                    log.debug("look at slice {} for collection {} as possible create candidate", slice.getName(), collection);
                    Collection<Replica> replicas = slice.getReplicas();
                    for (Replica replica : replicas) {
                        String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
                        if (coresPerNode.containsKey(baseUrl)) {
                            Integer nodeCount = coresPerNode.get(baseUrl);
                            coresPerNode.put(baseUrl, nodeCount++);
                        } else {
                            coresPerNode.put(baseUrl, 1);
                        if (baseUrl.equals(badReplica.replica.getStr(ZkStateReader.BASE_URL_PROP))) {
                        // on a live node?
                        log.debug("collection={} nodename={} livenodes={}", collection, replica.getNodeName(), clusterState.getLiveNodes());
                        boolean live = clusterState.liveNodesContain(replica.getNodeName());
                        log.debug("collection={} look at replica {} as possible create candidate, live={}", collection, replica.getName(), live);
                        if (live) {
                            Counts cnt = counts.get(baseUrl);
                            if (cnt == null) {
                                cnt = new Counts();
                            if (badReplica.collection.getName().equals(collection)) {
                                cnt.negRankingWeight += 3;
                                cnt.collectionShardsOnNode += 1;
                            } else {
                                cnt.negRankingWeight += 1;
                            if (badReplica.collection.getName().equals(collection) && badReplica.slice.getName().equals(slice.getName())) {
                            Integer maxShardsPerNode = badReplica.collection.getMaxShardsPerNode();
                            if (maxShardsPerNode == null) {
                                log.warn("maxShardsPerNode is not defined for collection, name=" + badReplica.collection.getName());
                                maxShardsPerNode = Integer.MAX_VALUE;
                            log.debug("collection={} node={} maxShardsPerNode={} maxCoresPerNode={} potential hosts={}", collection, baseUrl, maxShardsPerNode, maxCoreCount, cnt);
                            Collection<Replica> badSliceReplicas = null;
                            DocCollection c = clusterState.getCollection(badReplica.collection.getName());
                            if (c != null) {
                                Slice s = c.getSlice(badReplica.slice.getName());
                                if (s != null) {
                                    badSliceReplicas = s.getReplicas();
                            boolean alreadyExistsOnNode = replicaAlreadyExistsOnNode(zkStateReader.getClusterState(), badSliceReplicas, badReplica, baseUrl);
                            if (unsuitableHosts.contains(baseUrl) || alreadyExistsOnNode || cnt.collectionShardsOnNode >= maxShardsPerNode || (maxCoreCount != null && coresPerNode.get(baseUrl) >= maxCoreCount)) {
                                log.debug("not a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);
                            } else {
                                counts.put(baseUrl, cnt);
                                log.debug("is a candidate node, collection={} node={} max shards per node={} good replicas={}", collection, baseUrl, maxShardsPerNode, cnt);
    for (String node : liveNodes) {
        counts.put(zkStateReader.getBaseUrlForNodeName(node), new Counts(0, 0));
    if (counts.size() == 0) {
        log.debug("no suitable hosts found for getBestCreateUrl for collection={}", badReplica.collection.getName());
        return null;
    ValueComparator vc = new ValueComparator(counts);
    Map<String, Counts> sortedCounts = new TreeMap<String, Counts>(vc);
    log.debug("empty nodes={} for collection={}", liveNodes, badReplica.collection.getName());
    log.debug("sorted hosts={} for collection={}", sortedCounts, badReplica.collection.getName());
    log.debug("unsuitable hosts={} for collection={}", unsuitableHosts, badReplica.collection.getName());
    return sortedCounts.keySet().iterator().next();
Also used : ClusterState( HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Replica( Slice( DocCollection( HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 24 with Slice

use of in project lucene-solr by apache.

the class OverseerCollectionMessageHandler method waitForCoreNodeName.

String waitForCoreNodeName(String collectionName, String msgNodeName, String msgCore) {
    int retryCount = 320;
    while (retryCount-- > 0) {
        Map<String, Slice> slicesMap = zkStateReader.getClusterState().getSlicesMap(collectionName);
        if (slicesMap != null) {
            for (Slice slice : slicesMap.values()) {
                for (Replica replica : slice.getReplicas()) {
                    // TODO: for really large clusters, we could 'index' on this
                    String nodeName = replica.getStr(ZkStateReader.NODE_NAME_PROP);
                    String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
                    if (nodeName.equals(msgNodeName) && core.equals(msgCore)) {
                        return replica.getName();
        try {
        } catch (InterruptedException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, "Could not find coreNodeName");
Also used : Slice( Replica( RemoteSolrException(org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException) SolrException(org.apache.solr.common.SolrException)

Example 25 with Slice

use of in project lucene-solr by apache.

the class OverseerCollectionMessageHandler method waitToSeeReplicasInState.

Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
    Map<String, Replica> result = new HashMap<>();
    TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
    while (true) {
        DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
        for (String coreName : coreNames) {
            if (result.containsKey(coreName))
            for (Slice slice : coll.getSlices()) {
                for (Replica replica : slice.getReplicas()) {
                    if (coreName.equals(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
                        result.put(coreName, replica);
        if (result.size() == coreNames.size()) {
            return result;
        } else {
            log.debug("Expecting {} cores but found {}", coreNames.size(), result.size());
        if (timeout.hasTimedOut()) {
            throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas: " + coreNames + " in cluster state.");
Also used : HashMap(java.util.HashMap) TimeOut(org.apache.solr.util.TimeOut) Slice( DocCollection( Replica( RemoteSolrException(org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException) SolrException(org.apache.solr.common.SolrException)


Slice ( Replica ( DocCollection ( ClusterState ( ArrayList (java.util.ArrayList)79 HashMap (java.util.HashMap)68 ZkStateReader ( SolrException (org.apache.solr.common.SolrException)49 Map (java.util.Map)47 Test (org.junit.Test)37 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)28 CloudSolrClient (org.apache.solr.client.solrj.impl.CloudSolrClient)25 HashSet (java.util.HashSet)24 SolrQuery (org.apache.solr.client.solrj.SolrQuery)24 IOException ( NamedList (org.apache.solr.common.util.NamedList)23 List (java.util.List)22 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)22 DocRouter ( ZkCoreNodeProps (