use of org.apache.solr.common.cloud.CompositeIdRouter in project lucene-solr by apache.
the class SolrIndexSplitterTest method testSplitByRouteKey.
@Test
public void testSplitByRouteKey() throws Exception {
File indexDir = createTempDir().toFile();
CompositeIdRouter r1 = new CompositeIdRouter();
String splitKey = "sea-line!";
String key2 = "soul-raising!";
// murmur2 has a collision on the above two keys
assertEquals(r1.keyHashRange(splitKey), r1.keyHashRange(key2));
for (int i = 0; i < 10; i++) {
assertU(adoc("id", splitKey + i));
assertU(adoc("id", key2 + i));
}
assertU(commit());
assertJQ(req("q", "*:*"), "/response/numFound==20");
DocRouter.Range splitKeyRange = r1.keyHashRange(splitKey);
LocalSolrQueryRequest request = null;
Directory directory = null;
try {
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request, Lists.newArrayList(indexDir.getAbsolutePath()), null, Lists.newArrayList(splitKeyRange), new CompositeIdRouter(), null, splitKey);
new SolrIndexSplitter(command).split();
directory = h.getCore().getDirectoryFactory().get(indexDir.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
DirectoryReader reader = DirectoryReader.open(directory);
assertEquals("split index has wrong number of documents", 10, reader.numDocs());
reader.close();
h.getCore().getDirectoryFactory().release(directory);
directory = null;
} finally {
if (request != null) {
request.close();
}
if (directory != null) {
h.getCore().getDirectoryFactory().release(directory);
}
}
}
use of org.apache.solr.common.cloud.CompositeIdRouter in project lucene-solr by apache.
the class ShardSplitTest method splitByRouteKeyTest.
private void splitByRouteKeyTest() throws Exception {
log.info("Starting splitByRouteKeyTest");
String collectionName = "splitByRouteKeyTest";
int numShards = 4;
int replicationFactor = 2;
int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient().getZkStateReader().getClusterState().getLiveNodes().size())) + 1;
HashMap<String, List<Integer>> collectionInfos = new HashMap<>();
try (CloudSolrClient client = createCloudClient(null)) {
Map<String, Object> props = Utils.makeMap(REPLICATION_FACTOR, replicationFactor, MAX_SHARDS_PER_NODE, maxShardsPerNode, NUM_SLICES, numShards);
createCollection(collectionInfos, collectionName, props, client);
}
List<Integer> list = collectionInfos.get(collectionName);
checkForCollection(collectionName, list, null);
waitForRecoveriesToFinish(false);
String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName);
try (HttpSolrClient collectionClient = getHttpSolrClient(url)) {
String splitKey = "b!";
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
final DocRouter router = clusterState.getCollection(collectionName).getRouter();
Slice shard1 = clusterState.getSlice(collectionName, SHARD1);
DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
final List<DocRouter.Range> ranges = ((CompositeIdRouter) router).partitionRangeByKey(splitKey, shard1Range);
final int[] docCounts = new int[ranges.size()];
int uniqIdentifier = (1 << 12);
int splitKeyDocCount = 0;
for (int i = 100; i <= 200; i++) {
// See comment in ShardRoutingTest for hash distribution
String shardKey = "" + (char) ('a' + (i % 26));
String idStr = shardKey + "!" + i;
collectionClient.add(getDoc(id, idStr, "n_ti", (shardKey + "!").equals(splitKey) ? uniqIdentifier : i));
int idx = getHashRangeIdx(router, ranges, idStr);
if (idx != -1) {
docCounts[idx]++;
}
if (splitKey.equals(shardKey + "!"))
splitKeyDocCount++;
}
for (int i = 0; i < docCounts.length; i++) {
int docCount = docCounts[i];
log.info("Shard {} docCount = {}", "shard1_" + i, docCount);
}
log.info("Route key doc count = {}", splitKeyDocCount);
collectionClient.commit();
for (int i = 0; i < 3; i++) {
try {
splitShard(collectionName, null, null, splitKey);
break;
} catch (HttpSolrClient.RemoteSolrException e) {
if (e.code() != 500) {
throw e;
}
log.error("SPLITSHARD failed. " + (i < 2 ? " Retring split" : ""), e);
if (i == 2) {
fail("SPLITSHARD was not successful even after three tries");
}
}
}
waitForRecoveriesToFinish(collectionName, false);
SolrQuery solrQuery = new SolrQuery("*:*");
assertEquals("DocCount on shard1_0 does not match", docCounts[0], collectionClient.query(solrQuery.setParam("shards", "shard1_0")).getResults().getNumFound());
assertEquals("DocCount on shard1_1 does not match", docCounts[1], collectionClient.query(solrQuery.setParam("shards", "shard1_1")).getResults().getNumFound());
assertEquals("DocCount on shard1_2 does not match", docCounts[2], collectionClient.query(solrQuery.setParam("shards", "shard1_2")).getResults().getNumFound());
solrQuery = new SolrQuery("n_ti:" + uniqIdentifier);
assertEquals("shard1_0 must have 0 docs for route key: " + splitKey, 0, collectionClient.query(solrQuery.setParam("shards", "shard1_0")).getResults().getNumFound());
assertEquals("Wrong number of docs on shard1_1 for route key: " + splitKey, splitKeyDocCount, collectionClient.query(solrQuery.setParam("shards", "shard1_1")).getResults().getNumFound());
assertEquals("shard1_2 must have 0 docs for route key: " + splitKey, 0, collectionClient.query(solrQuery.setParam("shards", "shard1_2")).getResults().getNumFound());
}
}
use of org.apache.solr.common.cloud.CompositeIdRouter in project lucene-solr by apache.
the class MigrateCmd method migrateKey.
private void migrateKey(ClusterState clusterState, DocCollection sourceCollection, Slice sourceSlice, DocCollection targetCollection, Slice targetSlice, String splitKey, int timeout, NamedList results, String asyncId, ZkNodeProps message) throws Exception {
String tempSourceCollectionName = "split_" + sourceSlice.getName() + "_temp_" + targetSlice.getName();
ZkStateReader zkStateReader = ocmh.zkStateReader;
if (clusterState.hasCollection(tempSourceCollectionName)) {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
try {
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
clusterState = zkStateReader.getClusterState();
} catch (Exception e) {
log.warn("Unable to clean up existing temporary collection: " + tempSourceCollectionName, e);
}
}
CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
// intersect source range, keyHashRange and target range
// this is the range that has to be split from source and transferred to target
DocRouter.Range splitRange = ocmh.intersect(targetSlice.getRange(), ocmh.intersect(sourceSlice.getRange(), keyHashRange));
if (splitRange == null) {
log.info("No common hashes between source shard: {} and target shard: {}", sourceSlice.getName(), targetSlice.getName());
return;
}
log.info("Common hash range between source shard: {} and target shard: {} = " + splitRange, sourceSlice.getName(), targetSlice.getName());
Replica targetLeader = zkStateReader.getLeaderRetry(targetCollection.getName(), targetSlice.getName(), 10000);
// For tracking async calls.
Map<String, String> requestMap = new HashMap<>();
log.info("Asking target leader node: " + targetLeader.getNodeName() + " core: " + targetLeader.getStr("core") + " to buffer updates");
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTBUFFERUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to buffer updates", asyncId, requestMap);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.ADDROUTINGRULE.toLower(), COLLECTION_PROP, sourceCollection.getName(), SHARD_ID_PROP, sourceSlice.getName(), "routeKey", SolrIndexSplitter.getRouteKey(splitKey) + "!", "range", splitRange.toString(), "targetCollection", targetCollection.getName(), "expireAt", RoutingRule.makeExpiryAt(timeout));
log.info("Adding routing rule: " + m);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we see the new rule
log.info("Waiting to see routing rule updated in clusterstate");
TimeOut waitUntil = new TimeOut(60, TimeUnit.SECONDS);
boolean added = false;
while (!waitUntil.hasTimedOut()) {
Thread.sleep(100);
sourceCollection = zkStateReader.getClusterState().getCollection(sourceCollection.getName());
sourceSlice = sourceCollection.getSlice(sourceSlice.getName());
Map<String, RoutingRule> rules = sourceSlice.getRoutingRules();
if (rules != null) {
RoutingRule rule = rules.get(SolrIndexSplitter.getRouteKey(splitKey) + "!");
if (rule != null && rule.getRouteRanges().contains(splitRange)) {
added = true;
break;
}
}
}
if (!added) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not add routing rule: " + m);
}
log.info("Routing rule added successfully");
// Create temp core on source shard
Replica sourceLeader = zkStateReader.getLeaderRetry(sourceCollection.getName(), sourceSlice.getName(), 10000);
// create a temporary collection with just one node on the shard leader
String configName = zkStateReader.readConfigName(sourceCollection.getName());
Map<String, Object> props = makeMap(Overseer.QUEUE_OPERATION, CREATE.toLower(), NAME, tempSourceCollectionName, NRT_REPLICAS, 1, NUM_SLICES, 1, COLL_CONF, configName, CREATE_NODE_SET, sourceLeader.getNodeName());
if (asyncId != null) {
String internalAsyncId = asyncId + Math.abs(System.nanoTime());
props.put(ASYNC, internalAsyncId);
}
log.info("Creating temporary collection: " + props);
ocmh.commandMap.get(CREATE).call(clusterState, new ZkNodeProps(props), results);
// refresh cluster state
clusterState = zkStateReader.getClusterState();
Slice tempSourceSlice = clusterState.getCollection(tempSourceCollectionName).getSlices().iterator().next();
Replica tempSourceLeader = zkStateReader.getLeaderRetry(tempSourceCollectionName, tempSourceSlice.getName(), 120000);
String tempCollectionReplica1 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 1);
String coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, sourceLeader.getNodeName(), tempCollectionReplica1);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking source leader to wait for: " + tempCollectionReplica1 + " to be alive on: " + sourceLeader.getNodeName());
CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempCollectionReplica1);
cmd.setNodeName(sourceLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
// we don't want this to happen asynchronously
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), new ModifiableSolrParams(cmd.getParams()), shardHandler, null, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection leader" + " or timed out waiting for it to come up", asyncId, requestMap);
log.info("Asking source leader to split index");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.SPLIT.toString());
params.set(CoreAdminParams.CORE, sourceLeader.getStr("core"));
params.add(CoreAdminParams.TARGET_CORE, tempSourceLeader.getStr("core"));
params.set(CoreAdminParams.RANGES, splitRange.toString());
params.set("split.key", splitKey);
String tempNodeName = sourceLeader.getNodeName();
ocmh.sendShardRequest(tempNodeName, params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to invoke SPLIT core admin command", asyncId, requestMap);
log.info("Creating a replica of temporary collection: {} on the target leader node: {}", tempSourceCollectionName, targetLeader.getNodeName());
String tempCollectionReplica2 = Assign.buildCoreName(tempSourceCollectionName, tempSourceSlice.getName(), Replica.Type.NRT, 2);
props = new HashMap<>();
props.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
props.put(COLLECTION_PROP, tempSourceCollectionName);
props.put(SHARD_ID_PROP, tempSourceSlice.getName());
props.put("node", targetLeader.getNodeName());
props.put(CoreAdminParams.NAME, tempCollectionReplica2);
// copy over property params:
for (String key : message.keySet()) {
if (key.startsWith(COLL_PROP_PREFIX)) {
props.put(key, message.getStr(key));
}
}
// add async param
if (asyncId != null) {
props.put(ASYNC, asyncId);
}
((AddReplicaCmd) ocmh.commandMap.get(ADDREPLICA)).addReplica(clusterState, new ZkNodeProps(props), results, null);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create replica of " + "temporary collection in target leader node.", asyncId, requestMap);
coreNodeName = ocmh.waitForCoreNodeName(tempSourceCollectionName, targetLeader.getNodeName(), tempCollectionReplica2);
// wait for the replicas to be seen as active on temp source leader
log.info("Asking temp source leader to wait for: " + tempCollectionReplica2 + " to be alive on: " + targetLeader.getNodeName());
cmd = new CoreAdminRequest.WaitForState();
cmd.setCoreName(tempSourceLeader.getStr("core"));
cmd.setNodeName(targetLeader.getNodeName());
cmd.setCoreNodeName(coreNodeName);
cmd.setState(Replica.State.ACTIVE);
cmd.setCheckLive(true);
cmd.setOnlyIfLeader(true);
params = new ModifiableSolrParams(cmd.getParams());
ocmh.sendShardRequest(tempSourceLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to create temp collection" + " replica or timed out waiting for them to come up", asyncId, requestMap);
log.info("Successfully created replica of temp source collection on target leader node");
log.info("Requesting merge of temp source collection replica to target leader");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.MERGEINDEXES.toString());
params.set(CoreAdminParams.CORE, targetLeader.getStr("core"));
params.set(CoreAdminParams.SRC_CORE, tempCollectionReplica2);
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
String msg = "MIGRATE failed to merge " + tempCollectionReplica2 + " to " + targetLeader.getStr("core") + " on node: " + targetLeader.getNodeName();
ocmh.processResponses(results, shardHandler, true, msg, asyncId, requestMap);
log.info("Asking target leader to apply buffered updates");
params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminParams.CoreAdminAction.REQUESTAPPLYUPDATES.toString());
params.set(CoreAdminParams.NAME, targetLeader.getStr("core"));
ocmh.sendShardRequest(targetLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
ocmh.processResponses(results, shardHandler, true, "MIGRATE failed to request node to apply buffered updates", asyncId, requestMap);
try {
log.info("Deleting temporary collection: " + tempSourceCollectionName);
props = makeMap(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, tempSourceCollectionName);
ocmh.commandMap.get(DELETE).call(zkStateReader.getClusterState(), new ZkNodeProps(props), results);
} catch (Exception e) {
log.error("Unable to delete temporary collection: " + tempSourceCollectionName + ". Please remove it manually", e);
}
}
use of org.apache.solr.common.cloud.CompositeIdRouter in project lucene-solr by apache.
the class MigrateCmd method call.
@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String sourceCollectionName = message.getStr("collection");
String splitKey = message.getStr("split.key");
String targetCollectionName = message.getStr("target.collection");
int timeout = message.getInt("forward.timeout", 10 * 60) * 1000;
DocCollection sourceCollection = clusterState.getCollection(sourceCollectionName);
if (sourceCollection == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown source collection: " + sourceCollectionName);
}
DocCollection targetCollection = clusterState.getCollection(targetCollectionName);
if (targetCollection == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown target collection: " + sourceCollectionName);
}
if (!(sourceCollection.getRouter() instanceof CompositeIdRouter)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source collection must use a compositeId router");
}
if (!(targetCollection.getRouter() instanceof CompositeIdRouter)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target collection must use a compositeId router");
}
CompositeIdRouter sourceRouter = (CompositeIdRouter) sourceCollection.getRouter();
CompositeIdRouter targetRouter = (CompositeIdRouter) targetCollection.getRouter();
Collection<Slice> sourceSlices = sourceRouter.getSearchSlicesSingle(splitKey, null, sourceCollection);
if (sourceSlices.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No active slices available in source collection: " + sourceCollection + "for given split.key: " + splitKey);
}
Collection<Slice> targetSlices = targetRouter.getSearchSlicesSingle(splitKey, null, targetCollection);
if (targetSlices.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No active slices available in target collection: " + targetCollection + "for given split.key: " + splitKey);
}
String asyncId = null;
if (message.containsKey(ASYNC) && message.get(ASYNC) != null)
asyncId = message.getStr(ASYNC);
for (Slice sourceSlice : sourceSlices) {
for (Slice targetSlice : targetSlices) {
log.info("Migrating source shard: {} to target shard: {} for split.key = " + splitKey, sourceSlice, targetSlice);
migrateKey(clusterState, sourceCollection, sourceSlice, targetCollection, targetSlice, splitKey, timeout, results, asyncId, message);
}
}
}
use of org.apache.solr.common.cloud.CompositeIdRouter in project lucene-solr by apache.
the class DistributedUpdateProcessor method getNodesByRoutingRules.
private List<Node> getNodesByRoutingRules(ClusterState cstate, DocCollection coll, String id, SolrInputDocument doc) {
DocRouter router = coll.getRouter();
List<Node> nodes = null;
if (router instanceof CompositeIdRouter) {
CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
String myShardId = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
Slice slice = coll.getSlice(myShardId);
Map<String, RoutingRule> routingRules = slice.getRoutingRules();
if (routingRules != null) {
// delete by query case
if (id == null) {
for (Entry<String, RoutingRule> entry : routingRules.entrySet()) {
String targetCollectionName = entry.getValue().getTargetCollectionName();
Collection<Slice> activeSlices = cstate.getActiveSlices(targetCollectionName);
if (activeSlices != null && !activeSlices.isEmpty()) {
Slice any = activeSlices.iterator().next();
if (nodes == null)
nodes = new ArrayList<>();
nodes.add(new StdNode(new ZkCoreNodeProps(any.getLeader())));
}
}
return nodes;
}
String routeKey = SolrIndexSplitter.getRouteKey(id);
if (routeKey != null) {
RoutingRule rule = routingRules.get(routeKey + "!");
if (rule != null) {
if (!rule.isExpired()) {
List<DocRouter.Range> ranges = rule.getRouteRanges();
if (ranges != null && !ranges.isEmpty()) {
int hash = compositeIdRouter.sliceHash(id, doc, null, coll);
for (DocRouter.Range range : ranges) {
if (range.includes(hash)) {
DocCollection targetColl = cstate.getCollection(rule.getTargetCollectionName());
Collection<Slice> activeSlices = targetColl.getRouter().getSearchSlicesSingle(id, null, targetColl);
if (activeSlices == null || activeSlices.isEmpty()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "No active slices serving " + id + " found for target collection: " + rule.getTargetCollectionName());
}
Replica targetLeader = targetColl.getLeader(activeSlices.iterator().next().getName());
nodes = new ArrayList<>(1);
nodes.add(new StdNode(new ZkCoreNodeProps(targetLeader)));
break;
}
}
}
} else {
ReentrantLock ruleExpiryLock = req.getCore().getRuleExpiryLock();
if (!ruleExpiryLock.isLocked()) {
try {
if (ruleExpiryLock.tryLock(10, TimeUnit.MILLISECONDS)) {
log.info("Going to expire routing rule");
try {
Map<String, Object> map = Utils.makeMap(Overseer.QUEUE_OPERATION, OverseerAction.REMOVEROUTINGRULE.toLower(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.SHARD_ID_PROP, myShardId, "routeKey", routeKey + "!");
SolrZkClient zkClient = req.getCore().getCoreContainer().getZkController().getZkClient();
DistributedQueue queue = Overseer.getStateUpdateQueue(zkClient);
queue.offer(Utils.toJSON(map));
} catch (KeeperException e) {
log.warn("Exception while removing routing rule for route key: " + routeKey, e);
} catch (Exception e) {
log.error("Exception while removing routing rule for route key: " + routeKey, e);
} finally {
ruleExpiryLock.unlock();
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
}
}
}
}
return nodes;
}
Aggregations