Search in sources :

Example 21 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class BasicDistributedZk2Test method test.

@Test
@ShardsFixed(num = 4)
public void test() throws Exception {
    boolean testFinished = false;
    try {
        handle.clear();
        handle.put("timestamp", SKIPVAL);
        testNodeWithoutCollectionForwarding();
        indexr(id, 1, i1, 100, tlong, 100, t1, "now is the time for all good men", "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
        commit();
        // make sure we are in a steady state...
        waitForRecoveriesToFinish(false);
        assertDocCounts(false);
        indexAbunchOfDocs();
        // check again 
        waitForRecoveriesToFinish(false);
        commit();
        assertDocCounts(VERBOSE);
        checkQueries();
        assertDocCounts(VERBOSE);
        query("q", "*:*", "sort", "n_tl1 desc");
        brindDownShardIndexSomeDocsAndRecover();
        query("q", "*:*", "sort", "n_tl1 desc");
        // test adding another replica to a shard - it should do a
        // recovery/replication to pick up the index from the leader
        addNewReplica();
        long docId = testUpdateAndDelete();
        // index a bad doc...
        try {
            indexr(t1, "a doc with no id");
            fail("this should fail");
        } catch (SolrException e) {
        // expected
        }
        // TODO: bring this to its own method?
        // try indexing to a leader that has no replicas up
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        ZkNodeProps leaderProps = zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, SHARD2);
        String nodeName = leaderProps.getStr(ZkStateReader.NODE_NAME_PROP);
        chaosMonkey.stopShardExcept(SHARD2, nodeName);
        SolrClient client = getClient(nodeName);
        index_specific(client, "id", docId + 1, t1, "what happens here?");
        // expire a session...
        CloudJettyRunner cloudJetty = shardToJetty.get(SHARD1).get(0);
        chaosMonkey.expireSession(cloudJetty.jetty);
        indexr("id", docId + 1, t1, "slip this doc in");
        waitForRecoveriesToFinish(false);
        checkShardConsistency(SHARD1);
        checkShardConsistency(SHARD2);
        testFinished = true;
    } finally {
        if (!testFinished) {
            printLayoutOnTearDown = true;
        }
    }
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrClient(org.apache.solr.client.solrj.SolrClient) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) SolrException(org.apache.solr.common.SolrException) Test(org.junit.Test)

Example 22 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class BasicDistributedZkTest method getLeaderUrlFromZk.

protected ZkCoreNodeProps getLeaderUrlFromZk(String collection, String slice) {
    ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
    ZkNodeProps leader = clusterState.getLeader(collection, slice);
    if (leader == null) {
        throw new RuntimeException("Could not find leader:" + collection + " " + slice);
    }
    return new ZkCoreNodeProps(leader);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps)

Example 23 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class CreateCollectionCmd method call.

@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    final String collectionName = message.getStr(NAME);
    log.info("Create collection {}", collectionName);
    if (clusterState.hasCollection(collectionName)) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "collection already exists: " + collectionName);
    }
    String configName = getConfigName(collectionName, message);
    if (configName == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No config set found to associate with the collection.");
    }
    ocmh.validateConfigOrThrowSolrException(configName);
    try {
        // look at the replication factor and see if it matches reality
        // if it does not, find best nodes to create more cores
        int numTlogReplicas = message.getInt(TLOG_REPLICAS, 0);
        int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, numTlogReplicas > 0 ? 0 : 1));
        int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
        ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
        final String async = message.getStr(ASYNC);
        Integer numSlices = message.getInt(NUM_SLICES, null);
        String router = message.getStr("router.name", DocRouter.DEFAULT_NAME);
        List<String> shardNames = new ArrayList<>();
        if (ImplicitDocRouter.NAME.equals(router)) {
            ClusterStateMutator.getShardNames(shardNames, message.getStr("shards", null));
            numSlices = shardNames.size();
        } else {
            if (numSlices == null) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NUM_SLICES + " is a required param (when using CompositeId router).");
            }
            ClusterStateMutator.getShardNames(numSlices, shardNames);
        }
        int maxShardsPerNode = message.getInt(MAX_SHARDS_PER_NODE, 1);
        if (numNrtReplicas + numTlogReplicas <= 0) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NRT_REPLICAS + " + " + TLOG_REPLICAS + " must be greater than 0");
        }
        if (numSlices <= 0) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NUM_SLICES + " must be > 0");
        }
        // we need to look at every node and see how many cores it serves
        // add our new cores to existing nodes serving the least number of cores
        // but (for now) require that each core goes on a distinct node.
        final List<String> nodeList = OverseerCollectionMessageHandler.getLiveOrLiveAndCreateNodeSetList(clusterState.getLiveNodes(), message, RANDOM);
        Map<ReplicaAssigner.Position, String> positionVsNodes;
        if (nodeList.isEmpty()) {
            log.warn("It is unusual to create a collection (" + collectionName + ") without cores.");
            positionVsNodes = new HashMap<>();
        } else {
            int totalNumReplicas = numNrtReplicas + numTlogReplicas + numPullReplicas;
            if (totalNumReplicas > nodeList.size()) {
                log.warn("Specified number of replicas of " + totalNumReplicas + " on collection " + collectionName + " is higher than the number of Solr instances currently live or live and part of your " + CREATE_NODE_SET + "(" + nodeList.size() + "). It's unusual to run two replica of the same slice on the same Solr-instance.");
            }
            int maxShardsAllowedToCreate = maxShardsPerNode * nodeList.size();
            int requestedShardsToCreate = numSlices * totalNumReplicas;
            if (maxShardsAllowedToCreate < requestedShardsToCreate) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cannot create collection " + collectionName + ". Value of " + MAX_SHARDS_PER_NODE + " is " + maxShardsPerNode + ", and the number of nodes currently live or live and part of your " + CREATE_NODE_SET + " is " + nodeList.size() + ". This allows a maximum of " + maxShardsAllowedToCreate + " to be created. Value of " + NUM_SLICES + " is " + numSlices + ", value of " + NRT_REPLICAS + " is " + numNrtReplicas + ", value of " + TLOG_REPLICAS + " is " + numTlogReplicas + " and value of " + PULL_REPLICAS + " is " + numPullReplicas + ". This requires " + requestedShardsToCreate + " shards to be created (higher than the allowed number)");
            }
            positionVsNodes = ocmh.identifyNodes(clusterState, nodeList, message, shardNames, numNrtReplicas, numTlogReplicas, numPullReplicas);
        }
        ZkStateReader zkStateReader = ocmh.zkStateReader;
        boolean isLegacyCloud = Overseer.isLegacy(zkStateReader);
        ocmh.createConfNode(configName, collectionName, isLegacyCloud);
        Map<String, String> collectionParams = new HashMap<>();
        Map<String, Object> collectionProps = message.getProperties();
        for (String propName : collectionProps.keySet()) {
            if (propName.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
                collectionParams.put(propName.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), (String) collectionProps.get(propName));
            }
        }
        createCollectionZkNode(zkClient, collectionName, collectionParams);
        Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
        // wait for a while until we don't see the collection
        TimeOut waitUntil = new TimeOut(30, TimeUnit.SECONDS);
        boolean created = false;
        while (!waitUntil.hasTimedOut()) {
            Thread.sleep(100);
            created = zkStateReader.getClusterState().hasCollection(collectionName);
            if (created)
                break;
        }
        if (!created)
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully create collection: " + collectionName);
        if (nodeList.isEmpty()) {
            log.debug("Finished create command for collection: {}", collectionName);
            return;
        }
        // For tracking async calls.
        Map<String, String> requestMap = new HashMap<>();
        log.debug(formatString("Creating SolrCores for new collection {0}, shardNames {1} , nrtReplicas : {2}, tlogReplicas: {3}, pullReplicas: {4}", collectionName, shardNames, numNrtReplicas, numTlogReplicas, numPullReplicas));
        Map<String, ShardRequest> coresToCreate = new LinkedHashMap<>();
        for (Map.Entry<ReplicaAssigner.Position, String> e : positionVsNodes.entrySet()) {
            ReplicaAssigner.Position position = e.getKey();
            String nodeName = e.getValue();
            String coreName = Assign.buildCoreName(collectionName, position.shard, position.type, position.index + 1);
            log.debug(formatString("Creating core {0} as part of shard {1} of collection {2} on {3}", coreName, position.shard, collectionName, nodeName));
            String baseUrl = zkStateReader.getBaseUrlForNodeName(nodeName);
            // Otherwise the core creation fails
            if (!isLegacyCloud) {
                ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, ADDREPLICA.toString(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, position.shard, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.STATE_PROP, Replica.State.DOWN.toString(), ZkStateReader.BASE_URL_PROP, baseUrl, ZkStateReader.REPLICA_TYPE, position.type.name());
                Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
            }
            // Need to create new params for each request
            ModifiableSolrParams params = new ModifiableSolrParams();
            params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
            params.set(CoreAdminParams.NAME, coreName);
            params.set(COLL_CONF, configName);
            params.set(CoreAdminParams.COLLECTION, collectionName);
            params.set(CoreAdminParams.SHARD, position.shard);
            params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
            params.set(CoreAdminParams.NEW_COLLECTION, "true");
            params.set(CoreAdminParams.REPLICA_TYPE, position.type.name());
            if (async != null) {
                String coreAdminAsyncId = async + Math.abs(System.nanoTime());
                params.add(ASYNC, coreAdminAsyncId);
                requestMap.put(nodeName, coreAdminAsyncId);
            }
            ocmh.addPropertyParams(message, params);
            ShardRequest sreq = new ShardRequest();
            sreq.nodeName = nodeName;
            params.set("qt", ocmh.adminPath);
            sreq.purpose = 1;
            sreq.shards = new String[] { baseUrl };
            sreq.actualShards = sreq.shards;
            sreq.params = params;
            if (isLegacyCloud) {
                shardHandler.submit(sreq, sreq.shards[0], sreq.params);
            } else {
                coresToCreate.put(coreName, sreq);
            }
        }
        if (!isLegacyCloud) {
            // wait for all replica entries to be created
            Map<String, Replica> replicas = ocmh.waitToSeeReplicasInState(collectionName, coresToCreate.keySet());
            for (Map.Entry<String, ShardRequest> e : coresToCreate.entrySet()) {
                ShardRequest sreq = e.getValue();
                sreq.params.set(CoreAdminParams.CORE_NODE_NAME, replicas.get(e.getKey()).getName());
                shardHandler.submit(sreq, sreq.shards[0], sreq.params);
            }
        }
        ocmh.processResponses(results, shardHandler, false, null, async, requestMap, Collections.emptySet());
        if (results.get("failure") != null && ((SimpleOrderedMap) results.get("failure")).size() > 0) {
            // Let's cleanup as we hit an exception
            // We shouldn't be passing 'results' here for the cleanup as the response would then contain 'success'
            // element, which may be interpreted by the user as a positive ack
            ocmh.cleanupCollection(collectionName, new NamedList());
            log.info("Cleaned up artifacts for failed create collection for [{}]", collectionName);
        } else {
            log.debug("Finished create command on all shards for collection: {}", collectionName);
        }
    } catch (SolrException ex) {
        throw ex;
    } catch (Exception ex) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, null, ex);
    }
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TimeOut(org.apache.solr.util.TimeOut) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) StrUtils.formatString(org.apache.solr.common.util.StrUtils.formatString) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) LinkedHashMap(java.util.LinkedHashMap) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrException(org.apache.solr.common.SolrException) NamedList(org.apache.solr.common.util.NamedList) ReplicaAssigner(org.apache.solr.cloud.rule.ReplicaAssigner) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) ZooKeeperException(org.apache.solr.common.cloud.ZooKeeperException) NoNodeException(org.apache.zookeeper.KeeperException.NoNodeException) ShardRequest(org.apache.solr.handler.component.ShardRequest) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 24 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class DeleteCollectionCmd method call.

@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    final String collection = message.getStr(NAME);
    try {
        // Remove the snapshots meta-data for this collection in ZK. Deleting actual index files
        // should be taken care of as part of collection delete operation.
        SolrZkClient zkClient = zkStateReader.getZkClient();
        SolrSnapshotManager.cleanupCollectionLevelSnapshots(zkClient, collection);
        if (zkStateReader.getClusterState().getCollectionOrNull(collection) == null) {
            if (zkStateReader.getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
                // is not in the clusterstate
                return;
            }
        }
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.UNLOAD.toString());
        params.set(CoreAdminParams.DELETE_INSTANCE_DIR, true);
        params.set(CoreAdminParams.DELETE_DATA_DIR, true);
        String asyncId = message.getStr(ASYNC);
        Map<String, String> requestMap = null;
        if (asyncId != null) {
            requestMap = new HashMap<>();
        }
        Set<String> okayExceptions = new HashSet<>(1);
        okayExceptions.add(NonExistentCoreException.class.getName());
        ocmh.collectionCmd(message, params, results, null, asyncId, requestMap, okayExceptions);
        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, collection);
        Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
        // wait for a while until we don't see the collection
        TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
        boolean removed = false;
        while (!timeout.hasTimedOut()) {
            Thread.sleep(100);
            removed = !zkStateReader.getClusterState().hasCollection(collection);
            if (removed) {
                // just a bit of time so it's more likely other
                Thread.sleep(500);
                // readers see on return
                break;
            }
        }
        if (!removed) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + collection);
        }
    } finally {
        try {
            if (zkStateReader.getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
                zkStateReader.getZkClient().clean(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection);
            }
        } catch (InterruptedException e) {
            SolrException.log(log, "Cleaning up collection in zk was interrupted:" + collection, e);
            Thread.currentThread().interrupt();
        } catch (KeeperException e) {
            SolrException.log(log, "Problem cleaning up collection in zk:" + collection, e);
        }
    }
}
Also used : TimeOut(org.apache.solr.util.TimeOut) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) NonExistentCoreException(org.apache.solr.common.NonExistentCoreException) SolrZkClient(org.apache.solr.common.cloud.SolrZkClient) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) HashSet(java.util.HashSet)

Example 25 with ZkNodeProps

use of org.apache.solr.common.cloud.ZkNodeProps in project lucene-solr by apache.

the class DeleteShardCmd method getReplicasForSlice.

private List<ZkNodeProps> getReplicasForSlice(String collectionName, Slice slice) {
    List<ZkNodeProps> sourceReplicas = new ArrayList<>();
    for (Replica replica : slice.getReplicas()) {
        ZkNodeProps props = new ZkNodeProps(COLLECTION_PROP, collectionName, SHARD_ID_PROP, slice.getName(), ZkStateReader.CORE_NAME_PROP, replica.getCoreName(), ZkStateReader.REPLICA_PROP, replica.getName(), CoreAdminParams.NODE, replica.getNodeName());
        sourceReplicas.add(props);
    }
    return sourceReplicas;
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) Replica(org.apache.solr.common.cloud.Replica)

Aggregations

ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)91 SolrException (org.apache.solr.common.SolrException)35 HashMap (java.util.HashMap)28 Replica (org.apache.solr.common.cloud.Replica)22 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)20 ArrayList (java.util.ArrayList)19 Slice (org.apache.solr.common.cloud.Slice)19 KeeperException (org.apache.zookeeper.KeeperException)19 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)16 Test (org.junit.Test)16 DocCollection (org.apache.solr.common.cloud.DocCollection)15 SolrZkClient (org.apache.solr.common.cloud.SolrZkClient)14 Map (java.util.Map)13 ClusterState (org.apache.solr.common.cloud.ClusterState)13 IOException (java.io.IOException)10 ZkCoreNodeProps (org.apache.solr.common.cloud.ZkCoreNodeProps)10 ZooKeeperException (org.apache.solr.common.cloud.ZooKeeperException)10 NamedList (org.apache.solr.common.util.NamedList)10 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)9 SolrCore (org.apache.solr.core.SolrCore)8