use of org.apache.solr.util.TimeOut in project lucene-solr by apache.
the class CreateAliasCmd method checkForAlias.
private void checkForAlias(String name, String value) {
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
boolean success = false;
Aliases aliases;
while (!timeout.hasTimedOut()) {
aliases = ocmh.zkStateReader.getAliases();
String collections = aliases.getCollectionAlias(name);
if (collections != null && collections.equals(value)) {
success = true;
break;
}
}
if (!success) {
log.warn("Timeout waiting to be notified of Alias change...");
}
}
use of org.apache.solr.util.TimeOut in project lucene-solr by apache.
the class LeaderFailureAfterFreshStartTest method waitTillNodesActive.
private void waitTillNodesActive() throws Exception {
for (int i = 0; i < 60; i++) {
Thread.sleep(3000);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
ClusterState clusterState = zkStateReader.getClusterState();
DocCollection collection1 = clusterState.getCollection("collection1");
Slice slice = collection1.getSlice("shard1");
Collection<Replica> replicas = slice.getReplicas();
boolean allActive = true;
Collection<String> nodesDownNames = nodesDown.stream().map(n -> n.coreNodeName).collect(Collectors.toList());
Collection<Replica> replicasToCheck = null;
replicasToCheck = replicas.stream().filter(r -> !nodesDownNames.contains(r.getName())).collect(Collectors.toList());
for (Replica replica : replicasToCheck) {
if (!clusterState.liveNodesContain(replica.getNodeName()) || replica.getState() != Replica.State.ACTIVE) {
allActive = false;
break;
}
}
if (allActive) {
return;
}
}
printLayout();
fail("timeout waiting to see all nodes active");
}
use of org.apache.solr.util.TimeOut in project lucene-solr by apache.
the class OverseerRolesTest method waitForNewOverseer.
private void waitForNewOverseer(int seconds, Predicate<String> state) throws Exception {
TimeOut timeout = new TimeOut(seconds, TimeUnit.SECONDS);
String current = null;
while (timeout.hasTimedOut() == false) {
current = OverseerCollectionConfigSetProcessor.getLeaderNode(zkClient());
if (state.test(current))
return;
Thread.sleep(100);
}
fail("Timed out waiting for overseer state change");
}
use of org.apache.solr.util.TimeOut in project lucene-solr by apache.
the class MigrateCmd method migrateKey.
private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results, String asyncId, ZkNodeProps message) throws Exception {
String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName();
ZkStateReader zkStateReader = ocmh.zkStateReader;
if (clusterState.hasCollection(tempSourceCollectionName)) {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
try {
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
clusterState = zkStateReader.getClusterState();
} catch (Exception e) {
log.warn("Unable to clean up existing temporary collection: " + tempSourceCollectionName, e);
}
}
CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
// intersect source range, keyHashRange and target range
// this is the range that has to be split from source and transferred to target
DocRouter.Range splitRange = ocmh.intersect(targetSlice.getRange(), ocmh.intersect(sourceSlice.getRange(), keyHashRange));
if (splitRange == null) {
log.info("No common hashes between source shard: {} and target shard: {}", sourceSlice.getName(), targetSlice.getName());
return;
}
log.info("Common hash range between source shard: {} and target shard: {} = " + splitRange, sourceSlice.getName(), targetSlice.getName());
Replica targetLeader = zkStateReader.getLeaderRetry(targetCollection.getName(), targetSlice.getName(), 10000);
// For tracking async calls.
Map<String, String> requestMap = new HashMap<>();
log.info("Asking target leader node: " + targetLeader.getNodeName() + " core: " + targetLeader.getStr("core") + " to buffer updates");
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTBUFFERUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to buffer updates", asyncId, requestMap);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.ADDROUTINGRULE.toLower(), COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", RoutingRule.makeExpiryAt(timeout));
log.info("Adding routing rule: " + m);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we see the new rule
log.info("Waiting to see routing rule updated in clusterstate");
TimeOut waitUntil = new TimeOut(60, TimeUnit.SECONDS);
boolean added = false;
while (!waitUntil.hasTimedOut()) {
Thread.sleep(100);
sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName());
sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
if (rules != null) {
RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!");
if (rule != null && rule.getRouteRanges().contains(splitRange)) {
added = true;
break;
}
}
}
if (!added) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not add routing rule: " + m);
}
log.info("Routing rule added successfully");
// Create temp core on source shard
Replica sourceLeader = zkStateReader.getLeaderRetry(sourceCollection.getName(), sourceSlice.getName(), 10000);
// create a temporary collection with just one node on the shard leader
String configName = zkStateReader.readConfigName(sourceCollection.getName());
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, NUM_SLICES, 1, COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName());
if (asyncId != null) {
String internalAsyncId = asyncId + Math.abs(System.nanoTime());
props.put(ASYNC, internalAsyncId);
}
log.info("Creating temporary collection: " + props);
ocmh.commandMap.get(CREATE).call(clusterState, new ZkNodeProps(props), results);
// refresh cluster state
clusterState = zkStateReader.getClusterState();
Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next();
Replica tempSourceLeader = zkStateReader.getLeaderRetry(tempSourceCollectionName, tempSourceSlice.getName(), 120000);
String tempCollectionReplica1 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 1);
String coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, sourceLeader.getNodeName(), tempCollectionReplica1);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName());
CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempCollectionReplica1);
cmd.setNodeName(sourceLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
// we don't want this to happen asynchronously
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams()), shardHandler, null, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection leader" + " or timed out waiting for it to come up", asyncId, requestMap);
log.info("Asking source leader to split index");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
params.set(CoreAdminParams.CORE, sourceLeader.getStr("core"));
params.add(CoreAdminParams.TARGET_CORE, tempSourceLeader.getStr("core"));
params.set(CoreAdminParams.RANGES, splitRange.toString());
params.set("split.key", splitKey);
String tempNodeName = sourceLeader.getNodeName();
ocmh.sendShardRequest(tempNodeName, params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to invoke SPLIT core admin command", asyncId, requestMap);
log.info("Creating a replica of temporary collection: {} on the target leader node: {}", tempSourceCollectionName, targetLeader.getNodeName());
String tempCollectionReplica2 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 2);
props = new HashMap<>();
props.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
props.put(COLLECTION_PROP, tempSourceCollectionName);
props.put(SHARD_ID_PROP, tempSourceSlice.getName());
props.put("node", targetLeader.getNodeName());
props.put(CoreAdminParams.NAME, tempCollectionReplica2);
// copy over property params:
for (String key : message.keySet()) {
if (key.startsWith(COLL_PROP_PREFIX)) {
props.put(key, message.getStr(key));
}
}
// add async param
if (asyncId != null) {
props.put(ASYNC, asyncId);
}
((AddReplicaCmd) ocmh.commandMap.get(ADDREPLICA)).addReplica(clusterState, new ZkNodeProps(props), results, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create replica of " + "temporary collection in target leader node.", asyncId, requestMap);
coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, targetLeader.getNodeName(), tempCollectionReplica2);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName());
cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempSourceLeader.getStr("core"));
cmd.setNodeName(targetLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
params = new ModifiableSolrParams(cmd.getParams());
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection" + " replica or timed out waiting for them to come up", asyncId, requestMap);
log.info("Successfully created replica of temp source collection on target leader node");
log.info("Requesting merge of temp source collection replica to target leader");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.MERGEINDEXES.toString());
params.set(CoreAdminParams.CORE, targetLeader.getStr("core"));
params.set(CoreAdminParams.SRC_CORE, tempCollectionReplica2);
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
String msg = "MIGRATE failed to merge " + tempCollectionReplica2 + " to " + targetLeader.getStr("core") + " on node: " + targetLeader.getNodeName();
ocmh.processResponses(results, shardHandler, true, msg, asyncId, requestMap);
log.info("Asking target leader to apply buffered updates");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to apply buffered updates", asyncId, requestMap);
try {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
} catch (Exception e) {
log.error("Unable to delete temporary collection: " + tempSourceCollectionName + ". Please remove it manually", e);
}
}
use of org.apache.solr.util.TimeOut in project lucene-solr by apache.
the class CreateShardCmd method call.
@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(COLLECTION_PROP);
String sliceName = message.getStr(SHARD_ID_PROP);
log.info("Create shard invoked: {}", message);
if (collectionName == null || sliceName == null)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'collection' and 'shard' are required parameters");
int numSlices = 1;
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
DocCollection collection = clusterState.getCollection(collectionName);
// int repFactor = message.getInt(REPLICATION_FACTOR, collection.getInt(REPLICATION_FACTOR, 1));
int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, collection.getInt(NRT_REPLICAS, collection.getInt(REPLICATION_FACTOR, 1))));
int numPullReplicas = message.getInt(PULL_REPLICAS, collection.getInt(PULL_REPLICAS, 0));
int numTlogReplicas = message.getInt(TLOG_REPLICAS, collection.getInt(TLOG_REPLICAS, 0));
int totalReplicas = numNrtReplicas + numPullReplicas + numTlogReplicas;
if (numNrtReplicas + numTlogReplicas <= 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, NRT_REPLICAS + " + " + TLOG_REPLICAS + " must be greater than 0");
}
Object createNodeSetStr = message.get(OverseerCollectionMessageHandler.CREATE_NODE_SET);
List<Assign.ReplicaCount> sortedNodeList = getNodesForNewReplicas(clusterState, collectionName, sliceName, totalReplicas, createNodeSetStr, ocmh.overseer.getZkController().getCoreContainer());
ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
// wait for a while until we see the shard
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
boolean created = false;
while (!timeout.hasTimedOut()) {
Thread.sleep(100);
created = zkStateReader.getClusterState().getCollection(collectionName).getSlice(sliceName) != null;
if (created)
break;
}
if (!created)
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr(NAME));
String configName = message.getStr(COLL_CONF);
String async = message.getStr(ASYNC);
Map<String, String> requestMap = null;
if (async != null) {
requestMap = new HashMap<>(totalReplicas, 1.0f);
}
int createdNrtReplicas = 0, createdTlogReplicas = 0, createdPullReplicas = 0;
for (int j = 1; j <= totalReplicas; j++) {
int coreNameNumber;
Replica.Type typeToCreate;
if (createdNrtReplicas < numNrtReplicas) {
createdNrtReplicas++;
coreNameNumber = createdNrtReplicas;
typeToCreate = Replica.Type.NRT;
} else if (createdTlogReplicas < numTlogReplicas) {
createdTlogReplicas++;
coreNameNumber = createdTlogReplicas;
typeToCreate = Replica.Type.TLOG;
} else {
createdPullReplicas++;
coreNameNumber = createdPullReplicas;
typeToCreate = Replica.Type.PULL;
}
String nodeName = sortedNodeList.get(((j - 1)) % sortedNodeList.size()).nodeName;
String coreName = Assign.buildCoreName(collectionName, sliceName, typeToCreate, coreNameNumber);
// String coreName = collectionName + "_" + sliceName + "_replica" + j;
log.info("Creating replica " + coreName + " as part of slice " + sliceName + " of collection " + collectionName + " on " + nodeName);
// Need to create new params for each request
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.CREATE.toString());
params.set(CoreAdminParams.NAME, coreName);
params.set(CoreAdminParams.REPLICA_TYPE, typeToCreate.name());
params.set(COLL_CONF, configName);
params.set(CoreAdminParams.COLLECTION, collectionName);
params.set(CoreAdminParams.SHARD, sliceName);
params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
ocmh.addPropertyParams(message, params);
ocmh.sendShardRequest(nodeName, params, shardHandler, async, requestMap);
}
ocmh.processResponses(results, shardHandler, true, "Failed to create shard", async, requestMap, Collections.emptySet());
log.info("Finished create command on all shards for collection: " + collectionName);
}
Aggregations