Search in sources :

Example 31 with TimeOut

use of org.apache.solr.util.TimeOut in project lucene-solr by apache.

the class CreateAliasCmd method checkForAlias.

private void checkForAlias(String name, String value) {
    TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
    boolean success = false;
    Aliases aliases;
    while (!timeout.hasTimedOut()) {
        aliases = ocmh.zkStateReader.getAliases();
        String collections = aliases.getCollectionAlias(name);
        if (collections != null && collections.equals(value)) {
            success = true;
            break;
        }
    }
    if (!success) {
        log.warn("Timeout waiting to be notified of Alias change...");
    }
}
Also used : TimeOut(org.apache.solr.util.TimeOut) Aliases(org.apache.solr.common.cloud.Aliases)

Example 32 with TimeOut

use of org.apache.solr.util.TimeOut in project lucene-solr by apache.

the class LeaderFailureAfterFreshStartTest method waitTillNodesActive.

private void waitTillNodesActive() throws Exception {
    for (int i = 0; i < 60; i++) {
        Thread.sleep(3000);
        ZkStateReader zkStateReader = cloudClient.getZkStateReader();
        ClusterState clusterState = zkStateReader.getClusterState();
        DocCollection collection1 = clusterState.getCollection("collection1");
        Slice slice = collection1.getSlice("shard1");
        Collection<Replica> replicas = slice.getReplicas();
        boolean allActive = true;
        Collection<String> nodesDownNames = nodesDown.stream().map(n -> n.coreNodeName).collect(Collectors.toList());
        Collection<Replica> replicasToCheck = null;
        replicasToCheck = replicas.stream().filter(r -> !nodesDownNames.contains(r.getName())).collect(Collectors.toList());
        for (Replica replica : replicasToCheck) {
            if (!clusterState.liveNodesContain(replica.getNodeName()) || replica.getState() != Replica.State.ACTIVE) {
                allActive = false;
                break;
            }
        }
        if (allActive) {
            return;
        }
    }
    printLayout();
    fail("timeout waiting to see all nodes active");
}
Also used : ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slow(org.apache.lucene.util.LuceneTestCase.Slow) DocCollection(org.apache.solr.common.cloud.DocCollection) ClusterState(org.apache.solr.common.cloud.ClusterState) LoggerFactory(org.slf4j.LoggerFactory) ArrayList(java.util.ArrayList) Collections.singletonList(java.util.Collections.singletonList) HashSet(java.util.HashSet) SolrServerException(org.apache.solr.client.solrj.SolrServerException) RandomStringUtils(org.apache.commons.lang.RandomStringUtils) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) Logger(org.slf4j.Logger) Files(java.nio.file.Files) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) MethodHandles(java.lang.invoke.MethodHandles) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) Test(org.junit.Test) TimeOut(org.apache.solr.util.TimeOut) Collectors(java.util.stream.Collectors) Replica(org.apache.solr.common.cloud.Replica) List(java.util.List) Paths(java.nio.file.Paths) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) LimitViolationAction(org.apache.solr.cloud.ZkTestServer.LimitViolationAction) DigestUtils(org.apache.commons.codec.digest.DigestUtils) SECONDS(java.util.concurrent.TimeUnit.SECONDS) SolrInputDocument(org.apache.solr.common.SolrInputDocument) ClusterState(org.apache.solr.common.cloud.ClusterState) Slice(org.apache.solr.common.cloud.Slice) DocCollection(org.apache.solr.common.cloud.DocCollection) Replica(org.apache.solr.common.cloud.Replica)

Example 33 with TimeOut

use of org.apache.solr.util.TimeOut in project lucene-solr by apache.

the class OverseerRolesTest method waitForNewOverseer.

private void waitForNewOverseer(int seconds, Predicate<String> state) throws Exception {
    TimeOut timeout = new TimeOut(seconds, TimeUnit.SECONDS);
    String current = null;
    while (timeout.hasTimedOut() == false) {
        current = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
        if (state.test(current))
            return;
        Thread.sleep(100);
    }
    fail("Timed out waiting for overseer state change");
}
Also used : TimeOut(org.apache.solr.util.TimeOut)

Example 34 with TimeOut

use of org.apache.solr.util.TimeOut in project lucene-solr by apache.

the class MigrateCmd method migrateKey.

private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results, String asyncId, ZkNodeProps message) throws Exception {
    String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName();
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    if (clusterState.hasCollection(tempSourceCollectionName)) {
        log.info("Deleting temporary collection: " + tempSourceCollectionName);
        Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
        try {
            ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
            clusterState = zkStateReader.getClusterState();
        } catch (Exception e) {
            log.warn("Unable to clean up existing temporary collection: " + tempSourceCollectionName, e);
        }
    }
    CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
    DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
    ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
    log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
    // intersect source range, keyHashRange and target range
    // this is the range that has to be split from source and transferred to target
    DocRouter.Range splitRange = ocmh.intersect(targetSlice.getRange(), ocmh.intersect(sourceSlice.getRange(), keyHashRange));
    if (splitRange == null) {
        log.info("No common hashes between source shard: {} and target shard: {}", sourceSlice.getName(), targetSlice.getName());
        return;
    }
    log.info("Common hash range between source shard: {} and target shard: {} = " + splitRange, sourceSlice.getName(), targetSlice.getName());
    Replica targetLeader = zkStateReader.getLeaderRetry(targetCollection.getName(), targetSlice.getName(), 10000);
    // For tracking async calls.
    Map<String, String> requestMap = new HashMap<>();
    log.info("Asking target leader node: " + targetLeader.getNodeName() + " core: " + targetLeader.getStr("core") + " to buffer updates");
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTBUFFERUPDATES.toString());
    params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to buffer updates", asyncId, requestMap);
    ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.ADDROUTINGRULE.toLower(), COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", RoutingRule.makeExpiryAt(timeout));
    log.info("Adding routing rule: " + m);
    Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
    // wait for a while until we see the new rule
    log.info("Waiting to see routing rule updated in clusterstate");
    TimeOut waitUntil = new TimeOut(60, TimeUnit.SECONDS);
    boolean added = false;
    while (!waitUntil.hasTimedOut()) {
        Thread.sleep(100);
        sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName());
        sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
        Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
        if (rules != null) {
            RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!");
            if (rule != null && rule.getRouteRanges().contains(splitRange)) {
                added = true;
                break;
            }
        }
    }
    if (!added) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not add routing rule: " + m);
    }
    log.info("Routing rule added successfully");
    // Create temp core on source shard
    Replica sourceLeader = zkStateReader.getLeaderRetry(sourceCollection.getName(), sourceSlice.getName(), 10000);
    // create a temporary collection with just one node on the shard leader
    String configName = zkStateReader.readConfigName(sourceCollection.getName());
    Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, NUM_SLICES, 1, COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName());
    if (asyncId != null) {
        String internalAsyncId = asyncId + Math.abs(System.nanoTime());
        props.put(ASYNC, internalAsyncId);
    }
    log.info("Creating temporary collection: " + props);
    ocmh.commandMap.get(CREATE).call(clusterState, new ZkNodeProps(props), results);
    // refresh cluster state
    clusterState = zkStateReader.getClusterState();
    Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next();
    Replica tempSourceLeader = zkStateReader.getLeaderRetry(tempSourceCollectionName, tempSourceSlice.getName(), 120000);
    String tempCollectionReplica1 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 1);
    String coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, sourceLeader.getNodeName(), tempCollectionReplica1);
    // wait for the replicas to be seen as active on temp source leader
    log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName());
    CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
    cmd.setCoreName(tempCollectionReplica1);
    cmd.setNodeName(sourceLeader.getNodeName());
    cmd.setCoreNodeName(coreNodeName);
    cmd.setState(Replica.State.ACTIVE);
    cmd.setCheckLive(true);
    cmd.setOnlyIfLeader(true);
    // we don't want this to happen asynchronously
    ocmh.sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams()), shardHandler, null, null);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection leader" + " or timed out waiting for it to come up", asyncId, requestMap);
    log.info("Asking source leader to split index");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
    params.set(CoreAdminParams.CORE, sourceLeader.getStr("core"));
    params.add(CoreAdminParams.TARGET_CORE, tempSourceLeader.getStr("core"));
    params.set(CoreAdminParams.RANGES, splitRange.toString());
    params.set("split.key", splitKey);
    String tempNodeName = sourceLeader.getNodeName();
    ocmh.sendShardRequest(tempNodeName, params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to invoke SPLIT core admin command", asyncId, requestMap);
    log.info("Creating a replica of temporary collection: {} on the target leader node: {}", tempSourceCollectionName, targetLeader.getNodeName());
    String tempCollectionReplica2 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 2);
    props = new HashMap<>();
    props.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
    props.put(COLLECTION_PROP, tempSourceCollectionName);
    props.put(SHARD_ID_PROP, tempSourceSlice.getName());
    props.put("node", targetLeader.getNodeName());
    props.put(CoreAdminParams.NAME, tempCollectionReplica2);
    // copy over property params:
    for (String key : message.keySet()) {
        if (key.startsWith(COLL_PROP_PREFIX)) {
            props.put(key, message.getStr(key));
        }
    }
    // add async param
    if (asyncId != null) {
        props.put(ASYNC, asyncId);
    }
    ((AddReplicaCmd) ocmh.commandMap.get(ADDREPLICA)).addReplica(clusterState, new ZkNodeProps(props), results, null);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create replica of " + "temporary collection in target leader node.", asyncId, requestMap);
    coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, targetLeader.getNodeName(), tempCollectionReplica2);
    // wait for the replicas to be seen as active on temp source leader
    log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName());
    cmd = new CoreAdminRequest.WaitForState();
    cmd.setCoreName(tempSourceLeader.getStr("core"));
    cmd.setNodeName(targetLeader.getNodeName());
    cmd.setCoreNodeName(coreNodeName);
    cmd.setState(Replica.State.ACTIVE);
    cmd.setCheckLive(true);
    cmd.setOnlyIfLeader(true);
    params = new ModifiableSolrParams(cmd.getParams());
    ocmh.sendShardRequest(tempSourceLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection" + " replica or timed out waiting for them to come up", asyncId, requestMap);
    log.info("Successfully created replica of temp source collection on target leader node");
    log.info("Requesting merge of temp source collection replica to target leader");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.MERGEINDEXES.toString());
    params.set(CoreAdminParams.CORE, targetLeader.getStr("core"));
    params.set(CoreAdminParams.SRC_CORE, tempCollectionReplica2);
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    String msg = "MIGRATE failed to merge " + tempCollectionReplica2 + " to " + targetLeader.getStr("core") + " on node: " + targetLeader.getNodeName();
    ocmh.processResponses(results, shardHandler, true, msg, asyncId, requestMap);
    log.info("Asking target leader to apply buffered updates");
    params = new ModifiableSolrParams();
    params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
    params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
    ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
    ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to apply buffered updates", asyncId, requestMap);
    try {
        log.info("Deleting temporary collection: " + tempSourceCollectionName);
        props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
        ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
    } catch (Exception e) {
        log.error("Unable to delete temporary collection: " + tempSourceCollectionName + ". Please remove it manually", e);
    }
}
Also used : HashMap(java.util.HashMap) TimeOut(org.apache.solr.util.TimeOut) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) CoreAdminRequest(org.apache.solr.client.solrj.request.CoreAdminRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) DocRouter(org.apache.solr.common.cloud.DocRouter) RoutingRule(org.apache.solr.common.cloud.RoutingRule) SolrException(org.apache.solr.common.SolrException) ShardHandlerFactory(org.apache.solr.handler.component.ShardHandlerFactory) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) SolrException(org.apache.solr.common.SolrException) CompositeIdRouter(org.apache.solr.common.cloud.CompositeIdRouter) Slice(org.apache.solr.common.cloud.Slice)

Example 35 with TimeOut

use of org.apache.solr.util.TimeOut in project lucene-solr by apache.

the class CreateShardCmd method call.

@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
    String collectionName = message.getStr(COLLECTION_PROP);
    String sliceName = message.getStr(SHARD_ID_PROP);
    log.info("Create shard invoked: {}", message);
    if (collectionName == null || sliceName == null)
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'collection' and 'shard' are required parameters");
    int numSlices = 1;
    ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
    DocCollection collection = clusterState.getCollection(collectionName);
    //    int repFactor = message.getInt(REPLICATION_FACTOR, collection.getInt(REPLICATION_FACTOR, 1));
    int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, collection.getInt(NRT_REPLICAS, collection.getInt(REPLICATION_FACTOR, 1))));
    int numPullReplicas = message.getInt(PULL_REPLICAS, collection.getInt(PULL_REPLICAS, 0));
    int numTlogReplicas = message.getInt(TLOG_REPLICAS, collection.getInt(TLOG_REPLICAS, 0));
    int totalReplicas = numNrtReplicas + numPullReplicas + numTlogReplicas;
    if (numNrtReplicas + numTlogReplicas <= 0) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NRT_REPLICAS + " + " + TLOG_REPLICAS + " must be greater than 0");
    }
    Object createNodeSetStr = message.get(OverseerCollectionMessageHandler.CREATE_NODE_SET);
    List<Assign.ReplicaCount> sortedNodeList = getNodesForNewReplicas(clusterState, collectionName, sliceName, totalReplicas, createNodeSetStr, ocmh.overseer.getZkController().getCoreContainer());
    ZkStateReader zkStateReader = ocmh.zkStateReader;
    Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
    // wait for a while until we see the shard
    TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
    boolean created = false;
    while (!timeout.hasTimedOut()) {
        Thread.sleep(100);
        created = zkStateReader.getClusterState().getCollection(collectionName).getSlice(sliceName) != null;
        if (created)
            break;
    }
    if (!created)
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr(NAME));
    String configName = message.getStr(COLL_CONF);
    String async = message.getStr(ASYNC);
    Map<String, String> requestMap = null;
    if (async != null) {
        requestMap = new HashMap<>(totalReplicas, 1.0f);
    }
    int createdNrtReplicas = 0, createdTlogReplicas = 0, createdPullReplicas = 0;
    for (int j = 1; j <= totalReplicas; j++) {
        int coreNameNumber;
        Replica.Type typeToCreate;
        if (createdNrtReplicas < numNrtReplicas) {
            createdNrtReplicas++;
            coreNameNumber = createdNrtReplicas;
            typeToCreate = Replica.Type.NRT;
        } else if (createdTlogReplicas < numTlogReplicas) {
            createdTlogReplicas++;
            coreNameNumber = createdTlogReplicas;
            typeToCreate = Replica.Type.TLOG;
        } else {
            createdPullReplicas++;
            coreNameNumber = createdPullReplicas;
            typeToCreate = Replica.Type.PULL;
        }
        String nodeName = sortedNodeList.get(((j - 1)) % sortedNodeList.size()).nodeName;
        String coreName = Assign.buildCoreName(collectionName, sliceName, typeToCreate, coreNameNumber);
        //      String coreName = collectionName + "_" + sliceName + "_replica" + j;
        log.info("Creating replica " + coreName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + nodeName);
        // Need to create new params for each request
        ModifiableSolrParams params = new ModifiableSolrParams();
        params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
        params.set(CoreAdminParams.NAME, coreName);
        params.set(CoreAdminParams.REPLICA_TYPE, typeToCreate.name());
        params.set(COLL_CONF, configName);
        params.set(CoreAdminParams.COLLECTION, collectionName);
        params.set(CoreAdminParams.SHARD, sliceName);
        params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
        ocmh.addPropertyParams(message, params);
        ocmh.sendShardRequest(nodeName, params, shardHandler, async, requestMap);
    }
    ocmh.processResponses(results, shardHandler, true, "Failed to create shard", async, requestMap, Collections.emptySet());
    log.info("Finished create command on all shards for collection: " + collectionName);
}
Also used : TimeOut(org.apache.solr.util.TimeOut) ShardHandler(org.apache.solr.handler.component.ShardHandler) Replica(org.apache.solr.common.cloud.Replica) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) DocCollection(org.apache.solr.common.cloud.DocCollection) SolrException(org.apache.solr.common.SolrException)

Aggregations

TimeOut (org.apache.solr.util.TimeOut)48 SolrException (org.apache.solr.common.SolrException)15 Slice (org.apache.solr.common.cloud.Slice)15 DocCollection (org.apache.solr.common.cloud.DocCollection)14 Replica (org.apache.solr.common.cloud.Replica)13 SolrQuery (org.apache.solr.client.solrj.SolrQuery)11 ZkStateReader (org.apache.solr.common.cloud.ZkStateReader)8 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)8 HashMap (java.util.HashMap)7 Test (org.junit.Test)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)6 SolrInputDocument (org.apache.solr.common.SolrInputDocument)6 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)6 NamedList (org.apache.solr.common.util.NamedList)6 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)5 Map (java.util.Map)4 SolrServerException (org.apache.solr.client.solrj.SolrServerException)4 Collections.singletonList (java.util.Collections.singletonList)3 HashSet (java.util.HashSet)3