Search in sources :

Example 16 with DocRouter

use of org.apache.solr.common.cloud.DocRouter in project lucene-solr by apache.

the class TestHashPartitioner method testHashCodes.

public void testHashCodes() throws Exception {
    DocRouter router = DocRouter.getDocRouter(PlainIdRouter.NAME);
    assertTrue(router instanceof PlainIdRouter);
    DocCollection coll = createCollection(4, router);
    doNormalIdHashing(coll);
}
Also used : PlainIdRouter(org.apache.solr.common.cloud.PlainIdRouter) DocRouter(org.apache.solr.common.cloud.DocRouter) DocCollection(org.apache.solr.common.cloud.DocCollection)

Example 17 with DocRouter

use of org.apache.solr.common.cloud.DocRouter in project lucene-solr by apache.

the class TestHashPartitioner method testRandomCompositeIds.

/** Make sure CompositeIdRouter can route random IDs without throwing exceptions */
public void testRandomCompositeIds() throws Exception {
    DocRouter router = DocRouter.getDocRouter(CompositeIdRouter.NAME);
    DocCollection coll = createCollection(TestUtil.nextInt(random(), 1, 10), router);
    StringBuilder idBuilder = new StringBuilder();
    for (int i = 0; i < 10000; ++i) {
        idBuilder.setLength(0);
        int numParts = TestUtil.nextInt(random(), 1, 30);
        for (int part = 0; part < numParts; ++part) {
            switch(random().nextInt(5)) {
                case 0:
                    idBuilder.append('!');
                    break;
                case 1:
                    idBuilder.append('/');
                    break;
                case 2:
                    idBuilder.append(TestUtil.nextInt(random(), -100, 1000));
                    break;
                default:
                    {
                        int length = TestUtil.nextInt(random(), 1, 10);
                        char[] str = new char[length];
                        TestUtil.randomFixedLengthUnicodeString(random(), str, 0, length);
                        idBuilder.append(str);
                        break;
                    }
            }
        }
        String id = idBuilder.toString();
        try {
            Slice targetSlice = router.getTargetSlice(id, null, null, null, coll);
            assertNotNull(targetSlice);
        } catch (Exception e) {
            throw new Exception("Exception routing id '" + id + "'", e);
        }
    }
}
Also used : Slice(org.apache.solr.common.cloud.Slice) DocRouter(org.apache.solr.common.cloud.DocRouter) DocCollection(org.apache.solr.common.cloud.DocCollection)

Example 18 with DocRouter

use of org.apache.solr.common.cloud.DocRouter in project lucene-solr by apache.

the class ClusterStatus method getClusterStatus.

@SuppressWarnings("unchecked")
public void getClusterStatus(NamedList results) throws KeeperException, InterruptedException {
    // read aliases
    Aliases aliases = zkStateReader.getAliases();
    Map<String, List<String>> collectionVsAliases = new HashMap<>();
    Map<String, String> aliasVsCollections = aliases.getCollectionAliasMap();
    if (aliasVsCollections != null) {
        for (Map.Entry<String, String> entry : aliasVsCollections.entrySet()) {
            List<String> colls = StrUtils.splitSmart(entry.getValue(), ',');
            String alias = entry.getKey();
            for (String coll : colls) {
                if (collection == null || collection.equals(coll)) {
                    List<String> list = collectionVsAliases.get(coll);
                    if (list == null) {
                        list = new ArrayList<>();
                        collectionVsAliases.put(coll, list);
                    }
                    list.add(alias);
                }
            }
        }
    }
    Map roles = null;
    if (zkStateReader.getZkClient().exists(ZkStateReader.ROLES, true)) {
        roles = (Map) Utils.fromJSON(zkStateReader.getZkClient().getData(ZkStateReader.ROLES, null, null, true));
    }
    ClusterState clusterState = zkStateReader.getClusterState();
    // convert cluster state into a map of writable types
    byte[] bytes = Utils.toJSON(clusterState);
    Map<String, Object> stateMap = (Map<String, Object>) Utils.fromJSON(bytes);
    String routeKey = message.getStr(ShardParams._ROUTE_);
    String shard = message.getStr(ZkStateReader.SHARD_ID_PROP);
    Map<String, DocCollection> collectionsMap = null;
    if (collection == null) {
        collectionsMap = clusterState.getCollectionsMap();
    } else {
        collectionsMap = Collections.singletonMap(collection, clusterState.getCollectionOrNull(collection));
    }
    NamedList<Object> collectionProps = new SimpleOrderedMap<>();
    for (Map.Entry<String, DocCollection> entry : collectionsMap.entrySet()) {
        Map<String, Object> collectionStatus;
        String name = entry.getKey();
        DocCollection clusterStateCollection = entry.getValue();
        if (clusterStateCollection == null) {
            if (collection != null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + name + " not found");
            } else {
                //collection might have got deleted at the same time
                continue;
            }
        }
        Set<String> requestedShards = new HashSet<>();
        if (routeKey != null) {
            DocRouter router = clusterStateCollection.getRouter();
            Collection<Slice> slices = router.getSearchSlices(routeKey, null, clusterStateCollection);
            for (Slice slice : slices) {
                requestedShards.add(slice.getName());
            }
        }
        if (shard != null) {
            requestedShards.add(shard);
        }
        if (clusterStateCollection.getStateFormat() > 1) {
            bytes = Utils.toJSON(clusterStateCollection);
            Map<String, Object> docCollection = (Map<String, Object>) Utils.fromJSON(bytes);
            collectionStatus = getCollectionStatus(docCollection, name, requestedShards);
        } else {
            collectionStatus = getCollectionStatus((Map<String, Object>) stateMap.get(name), name, requestedShards);
        }
        collectionStatus.put("znodeVersion", clusterStateCollection.getZNodeVersion());
        if (collectionVsAliases.containsKey(name) && !collectionVsAliases.get(name).isEmpty()) {
            collectionStatus.put("aliases", collectionVsAliases.get(name));
        }
        String configName = zkStateReader.readConfigName(name);
        collectionStatus.put("configName", configName);
        collectionProps.add(name, collectionStatus);
    }
    List<String> liveNodes = zkStateReader.getZkClient().getChildren(ZkStateReader.LIVE_NODES_ZKNODE, null, true);
    // now we need to walk the collectionProps tree to cross-check replica state with live nodes
    crossCheckReplicaStateWithLiveNodes(liveNodes, collectionProps);
    NamedList<Object> clusterStatus = new SimpleOrderedMap<>();
    clusterStatus.add("collections", collectionProps);
    // read cluster properties
    Map clusterProps = zkStateReader.getClusterProperties();
    if (clusterProps != null && !clusterProps.isEmpty()) {
        clusterStatus.add("properties", clusterProps);
    }
    // add the alias map too
    if (aliasVsCollections != null && !aliasVsCollections.isEmpty()) {
        clusterStatus.add("aliases", aliasVsCollections);
    }
    // add the roles map
    if (roles != null) {
        clusterStatus.add("roles", roles);
    }
    // add live_nodes
    clusterStatus.add("live_nodes", liveNodes);
    results.add("cluster", clusterStatus);
}
Also used : HashMap(java.util.HashMap) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) DocRouter(org.apache.solr.common.cloud.DocRouter) NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) List(java.util.List) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) HashSet(java.util.HashSet) ClusterState(org.apache.solr.common.cloud.ClusterState) Aliases(org.apache.solr.common.cloud.Aliases) Slice(org.apache.solr.common.cloud.Slice) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 19 with DocRouter

use of org.apache.solr.common.cloud.DocRouter in project lucene-solr by apache.

the class ChaosMonkeyShardSplitTest method test.

@Test
public void test() throws Exception {
    waitForThingsToLevelOut(15);
    ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
    final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
    Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
    DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
    final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
    final int[] docCounts = new int[ranges.size()];
    int numReplicas = shard1.getReplicas().size();
    Thread indexThread = null;
    OverseerRestarter killer = null;
    Thread killerThread = null;
    final SolrClient solrClient = clients.get(0);
    try {
        del("*:*");
        for (int id = 0; id < 100; id++) {
            indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id), id);
        }
        commit();
        indexThread = new Thread() {

            @Override
            public void run() {
                int max = atLeast(401);
                for (int id = 101; id < max; id++) {
                    try {
                        indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id), id);
                        Thread.sleep(atLeast(25));
                    } catch (Exception e) {
                        log.error("Exception while adding doc", e);
                    }
                }
            }
        };
        indexThread.start();
        // kill the leader
        CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
        chaosMonkey.killJetty(leaderJetty);
        Thread.sleep(2000);
        waitForThingsToLevelOut(90);
        Thread.sleep(1000);
        checkShardConsistency(false, true);
        CloudJettyRunner deadJetty = leaderJetty;
        // Wait until new leader is elected
        while (deadJetty == leaderJetty) {
            updateMappingsFromZk(this.jettys, this.clients);
            leaderJetty = shardToLeaderJetty.get("shard1");
        }
        // bring back dead node
        // he is not the leader anymore
        ChaosMonkey.start(deadJetty.jetty);
        waitTillRecovered();
        // Kill the overseer
        // TODO: Actually kill the Overseer instance
        killer = new OverseerRestarter(zkServer.getZkAddress());
        killerThread = new Thread(killer);
        killerThread.start();
        killCounter.incrementAndGet();
        splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1, null, null);
        log.info("Layout after split: \n");
        printLayout();
    // distributed commit on all shards
    } finally {
        if (indexThread != null)
            indexThread.join();
        if (solrClient != null)
            solrClient.commit();
        if (killer != null) {
            killer.run = false;
            if (killerThread != null) {
                killerThread.join();
            }
        }
    }
    checkDocCountsAndShardStates(docCounts, numReplicas);
    // todo - can't call waitForThingsToLevelOut because it looks for
    // jettys of all shards
    // and the new sub-shards don't have any.
    waitForRecoveriesToFinish(true);
// waitForThingsToLevelOut(15);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) SolrClient(org.apache.solr.client.solrj.SolrClient) Slice(org.apache.solr.common.cloud.Slice) DocRouter(org.apache.solr.common.cloud.DocRouter) Test(org.junit.Test)

Example 20 with DocRouter

use of org.apache.solr.common.cloud.DocRouter in project lucene-solr by apache.

the class SplitShardCmd method split.

public boolean split(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr("collection");
    String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
    log.info("Split shard invoked");
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    zkStateReader.forceUpdateCollection(collectionName);
    String splitKey = message.getStr("split.key");
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    DocCollection collection = clusterState.getCollection(collectionName);
    DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
    Slice parentSlice;
    if (slice == null) {
        if (router instanceof CompositeIdRouter) {
            Collection<Slice> searchSlices = router.getSearchSlicesSingle(splitKey, new ModifiableSolrParams(), collection);
            if (searchSlices.isEmpty()) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to find an active shard for split.key: " + splitKey);
            }
            if (searchSlices.size() > 1) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Splitting a split.key: " + splitKey + " which spans multiple shards is not supported");
            }
            parentSlice = searchSlices.iterator().next();
            slice = parentSlice.getName();
            log.info("Split by route.key: {}, parent shard is: {} ", splitKey, slice);
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Split by route key can only be used with CompositeIdRouter or subclass. Found router: " + router.getClass().getName());
        }
    } else {
        parentSlice = collection.getSlice(slice);
    }
    if (parentSlice == null) {
        // an exception already
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
    }
    // find the leader for the shard
    Replica parentShardLeader = null;
    try {
        parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice, 10000);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    // let's record the ephemeralOwner of the parent leader node
    Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
    if (leaderZnodeStat == null) {
        // we just got to know the leader but its live node is gone already!
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
    }
    DocRouter.Range range = parentSlice.getRange();
    if (range == null) {
        range = new PlainIdRouter().fullRange();
    }
    List<DocRouter.Range> subRanges = null;
    String rangesStr = message.getStr(CoreAdminParams.RANGES);
    if (rangesStr != null) {
        String[] ranges = rangesStr.split(",");
        if (ranges.length == 0 || ranges.length == 1) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "There must be at least two ranges specified to split a shard");
        } else {
            subRanges = new ArrayList<>(ranges.length);
            for (int i = 0; i < ranges.length; i++) {
                String r = ranges[i];
                try {
                    subRanges.add(DocRouter.DEFAULT.fromString(r));
                } catch (Exception e) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception in parsing hexadecimal hash range: " + r, e);
                }
                if (!subRanges.get(i).isSubsetOf(range)) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash range: " + r + " is not a subset of parent shard's range: " + range.toString());
                }
            }
            // copy to preserve original order
            List<DocRouter.Range> temp = new ArrayList<>(subRanges);
            Collections.sort(temp);
            if (!range.equals(new DocRouter.Range(temp.get(0).min, temp.get(temp.size() - 1).max))) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " do not cover the entire range of parent shard: " + range);
            }
            for (int i = 1; i < temp.size(); i++) {
                if (temp.get(i - 1).max + 1 != temp.get(i).min) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specified hash ranges: " + rangesStr + " either overlap with each other or " + "do not cover the entire range of parent shard: " + range);
                }
            }
        }
    } else if (splitKey != null) {
        if (router instanceof CompositeIdRouter) {
            CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
            subRanges = compositeIdRouter.partitionRangeByKey(splitKey, range);
            if (subRanges.size() == 1) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " has a hash range that is exactly equal to hash range of shard: " + slice);
            }
            for (DocRouter.Range subRange : subRanges) {
                if (subRange.min == subRange.max) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " must be a compositeId");
                }
            }
            log.info("Partitioning parent shard " + slice + " range: " + parentSlice.getRange() + " yields: " + subRanges);
            rangesStr = "";
            for (int i = 0; i < subRanges.size(); i++) {
                DocRouter.Range subRange = subRanges.get(i);
                rangesStr += subRange.toString();
                if (i < subRanges.size() - 1)
                    rangesStr += ',';
            }
        }
    } else {
        // todo: fixed to two partitions?
        subRanges = router.partitionRange(2, range);
    }
    try {
        List<String> subSlices = new ArrayList<>(subRanges.size());
        List<String> subShardNames = new ArrayList<>(subRanges.size());
        String nodeName = parentShardLeader.getNodeName();
        for (int i = 0; i < subRanges.size(); i++) {
            String subSlice = slice + "_" + i;
            subSlices.add(subSlice);
            String subShardName = Assign.buildCoreName(collectionName, subSlice, Replica.Type.NRT, 1);
            subShardNames.add(subShardName);
        }
        boolean oldShardsDeleted = false;
        for (String subSlice : subSlices) {
            Slice oSlice = collection.getSlice(subSlice);
            if (oSlice != null) {
                final Slice.State state = oSlice.getState();
                if (state == Slice.State.ACTIVE) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
                } else if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) {
                    // delete the shards
                    log.info("Sub-shard: {} already exists therefore requesting its deletion", subSlice);
                    Map<String, Object> propMap = new HashMap<>();
                    propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
                    propMap.put(COLLECTION_PROP, collectionName);
                    propMap.put(SHARD_ID_PROP, subSlice);
                    ZkNodeProps m = new ZkNodeProps(propMap);
                    try {
                        ocmh.commandMap.get(DELETESHARD).call(clusterState, m, new NamedList());
                    } catch (Exception e) {
                        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to delete already existing sub shard: " + subSlice, e);
                    }
                    oldShardsDeleted = true;
                }
            }
        }
        if (oldShardsDeleted) {
            // refresh the locally cached cluster state
            // we know we have the latest because otherwise deleteshard would have failed
            clusterState = zkStateReader.getClusterState();
            collection = clusterState.getCollection(collectionName);
        }
        final String asyncId = message.getStr(ASYNC);
        Map<String, String> requestMap = new HashMap<>();
        for (int i = 0; i < subRanges.size(); i++) {
            String subSlice = subSlices.get(i);
            String subShardName = subShardNames.get(i);
            DocRouter.Range subRange = subRanges.get(i);
            log.info("Creating slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD.toLower());
            propMap.put(ZkStateReader.SHARD_ID_PROP, subSlice);
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
            propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
            propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
            propMap.put("shard_parent_node", parentShardLeader.getNodeName());
            propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
            // wait until we are able to see the new shard in cluster state
            ocmh.waitForNewShard(collectionName, subSlice);
            // refresh cluster state
            clusterState = zkStateReader.getClusterState();
            log.info("Adding replica " + subShardName + " as part of slice " + subSlice + " of collection " + collectionName + " on " + nodeName);
            propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
            propMap.put(COLLECTION_PROP, collectionName);
            propMap.put(SHARD_ID_PROP, subSlice);
            propMap.put("node", nodeName);
            propMap.put(CoreAdminParams.NAME, subShardName);
            // copy over property params:
            for (String key : message.keySet()) {
                if (key.startsWith(COLL_PROP_PREFIX)) {
                    propMap.put(key, message.getStr(key));
                }
            }
            // add async param
            if (asyncId != null) {
                propMap.put(ASYNC, asyncId);
            }
            ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap);
        for (String subShardName : subShardNames) {
            // wait for parent leader to acknowledge the sub-shard core
            log.info("Asking parent leader to wait for: " + subShardName + " to be alive on: " + nodeName);
            String coreNodeName = ocmh.waitForCoreNodeName(collectionName, nodeName, subShardName);
            CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
            cmd.setCoreName(subShardName);
            cmd.setNodeName(nodeName);
            cmd.setCoreNodeName(coreNodeName);
            cmd.setState(Replica.State.ACTIVE);
            cmd.setCheckLive(true);
            cmd.setOnlyIfLeader(true);
            ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
            ocmh.sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD timed out waiting for subshard leaders to come up", asyncId, requestMap);
        log.info("Successfully created all sub-shards for collection " + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
        log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice " + slice + " of collection " + collectionName + " on " + parentShardLeader);
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
        params.set(CoreAdminParams.CORE, parentShardLeader.getStr("core"));
        for (int i = 0; i < subShardNames.size(); i++) {
            String subShardName = subShardNames.get(i);
            params.add(CoreAdminParams.TARGET_CORE, subShardName);
        }
        params.set(CoreAdminParams.RANGES, rangesStr);
        ocmh.sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to invoke SPLIT core admin command", asyncId, requestMap);
        log.info("Index on shard: " + nodeName + " split into two successfully");
        // apply buffered updates on sub-shards
        for (int i = 0; i < subShardNames.size(); i++) {
            String subShardName = subShardNames.get(i);
            log.info("Applying buffered updates on : " + subShardName);
            params = new ModifiableSolrParams();
            params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
            params.set(CoreAdminParams.NAME, subShardName);
            ocmh.sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed while asking sub shard leaders" + " to apply buffered updates", asyncId, requestMap);
        log.info("Successfully applied buffered updates on : " + subShardNames);
        // Replica creation for the new Slices
        // look at the replication factor and see if it matches reality
        // if it does not, find best nodes to create more cores
        // TODO: Have replication factor decided in some other way instead of numShards for the parent
        int repFactor = parentSlice.getReplicas().size();
        // we need to look at every node and see how many cores it serves
        // add our new cores to existing nodes serving the least number of cores
        // but (for now) require that each core goes on a distinct node.
        // TODO: add smarter options that look at the current number of cores per
        // node?
        // for now we just go random
        Set<String> nodes = clusterState.getLiveNodes();
        List<String> nodeList = new ArrayList<>(nodes.size());
        nodeList.addAll(nodes);
        // TODO: Have maxShardsPerNode param for this operation?
        // Remove the node that hosts the parent shard for replica creation.
        nodeList.remove(nodeName);
        // TODO: change this to handle sharding a slice into > 2 sub-shards.
        Map<ReplicaAssigner.Position, String> nodeMap = ocmh.identifyNodes(clusterState, new ArrayList<>(clusterState.getLiveNodes()), new ZkNodeProps(collection.getProperties()), subSlices, repFactor - 1, 0, 0);
        List<Map<String, Object>> replicas = new ArrayList<>((repFactor - 1) * 2);
        for (Map.Entry<ReplicaAssigner.Position, String> entry : nodeMap.entrySet()) {
            String sliceName = entry.getKey().shard;
            String subShardNodeName = entry.getValue();
            String shardName = collectionName + "_" + sliceName + "_replica" + (entry.getKey().index);
            log.info("Creating replica shard " + shardName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + subShardNodeName);
            ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceName, ZkStateReader.CORE_NAME_PROP, shardName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName), ZkStateReader.NODE_NAME_PROP, subShardNodeName);
            Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
            HashMap<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
            propMap.put(COLLECTION_PROP, collectionName);
            propMap.put(SHARD_ID_PROP, sliceName);
            propMap.put("node", subShardNodeName);
            propMap.put(CoreAdminParams.NAME, shardName);
            // copy over property params:
            for (String key : message.keySet()) {
                if (key.startsWith(COLL_PROP_PREFIX)) {
                    propMap.put(key, message.getStr(key));
                }
            }
            // add async param
            if (asyncId != null) {
                propMap.put(ASYNC, asyncId);
            }
            // special flag param to instruct addReplica not to create the replica in cluster state again
            propMap.put(SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, "true");
            replicas.add(propMap);
        }
        assert TestInjection.injectSplitFailureBeforeReplicaCreation();
        long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
        // compare against the ephemeralOwner of the parent leader node
        leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
        if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
            // put sub-shards in recovery_failed state
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
            if (leaderZnodeStat == null) {
                // the leader is not live anymore, fail the split!
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
            } else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
                // there's a new leader, fail the split!
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from " + ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
            }
        }
        if (repFactor == 1) {
            // switch sub shard states to 'active'
            log.info("Replication factor is 1 so switching shard states");
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            propMap.put(slice, Slice.State.INACTIVE.toString());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.ACTIVE.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
        } else {
            log.info("Requesting shard state be set to 'recovery'");
            DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
            Map<String, Object> propMap = new HashMap<>();
            propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
            for (String subSlice : subSlices) {
                propMap.put(subSlice, Slice.State.RECOVERY.toString());
            }
            propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
            ZkNodeProps m = new ZkNodeProps(propMap);
            inQueue.offer(Utils.toJSON(m));
        }
        // now actually create replica cores on sub shard nodes
        for (Map<String, Object> replica : replicas) {
            ocmh.addReplica(clusterState, new ZkNodeProps(replica), results, null);
        }
        ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard replicas", asyncId, requestMap);
        log.info("Successfully created all replica shards for all sub-slices " + subSlices);
        ocmh.commit(results, slice, parentShardLeader);
        return true;
    } catch (SolrException e) {
        throw e;
    } catch (Exception e) {
        log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, e);
    }
}
Also used : HashMap(java.util.HashMap) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Stat(org.apache.zookeeper.data.Stat) DocRouter(org.apache.solr.common.cloud.DocRouter) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException) NamedList(org.apache.solr.common.util.NamedList) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) CompositeIdRouter(org.apache.solr.common.cloud.CompositeIdRouter) Slice(org.apache.solr.common.cloud.Slice) PlainIdRouter(org.apache.solr.common.cloud.PlainIdRouter) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

DocRouter (org.apache.solr.common.cloud.DocRouter)21 Slice (org.apache.solr.common.cloud.Slice)16 DocCollection (org.apache.solr.common.cloud.DocCollection)12 ArrayList (java.util.ArrayList)9 ClusterState (org.apache.solr.common.cloud.ClusterState)9 HashMap (java.util.HashMap)8 Map (java.util.Map)6 SolrException (org.apache.solr.common.SolrException)6 HashSet (java.util.HashSet)5 IOException (java.io.IOException)4 List (java.util.List)4 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)4 CompositeIdRouter (org.apache.solr.common.cloud.CompositeIdRouter)4 Replica (org.apache.solr.common.cloud.Replica)4 SolrQuery (org.apache.solr.client.solrj.SolrQuery)3 SolrServerException (org.apache.solr.client.solrj.SolrServerException)3 ImplicitDocRouter (org.apache.solr.common.cloud.ImplicitDocRouter)3 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)3 NamedList (org.apache.solr.common.util.NamedList)3 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)3