use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.
the class ShardSplitTest method splitByRouteFieldTest.
public void splitByRouteFieldTest() throws Exception {
log.info("Starting testSplitWithRouteField");
String collectionName = "routeFieldColl";
int numShards = 4;
int replicationFactor = 2;
int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient().getZkStateReader().getClusterState().getLiveNodes().size())) + 1;
HashMap<String, List<Integer>> collectionInfos = new HashMap<>();
String shard_fld = "shard_s";
try (CloudSolrClient client = createCloudClient(null)) {
Map<String, Object> props = Utils.makeMap(REPLICATION_FACTOR, replicationFactor, MAX_SHARDS_PER_NODE, maxShardsPerNode, NUM_SLICES, numShards, "router.field", shard_fld);
createCollection(collectionInfos, collectionName, props, client);
}
List<Integer> list = collectionInfos.get(collectionName);
checkForCollection(collectionName, list, null);
waitForRecoveriesToFinish(false);
String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName);
try (HttpSolrClient collectionClient = getHttpSolrClient(url)) {
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
final DocRouter router = clusterState.getCollection(collectionName).getRouter();
Slice shard1 = clusterState.getSlice(collectionName, SHARD1);
DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
final int[] docCounts = new int[ranges.size()];
for (int i = 100; i <= 200; i++) {
// See comment in ShardRoutingTest for hash distribution
String shardKey = "" + (char) ('a' + (i % 26));
collectionClient.add(getDoc(id, i, "n_ti", i, shard_fld, shardKey));
int idx = getHashRangeIdx(router, ranges, shardKey);
if (idx != -1) {
docCounts[idx]++;
}
}
for (int i = 0; i < docCounts.length; i++) {
int docCount = docCounts[i];
log.info("Shard {} docCount = {}", "shard1_" + i, docCount);
}
collectionClient.commit();
for (int i = 0; i < 3; i++) {
try {
splitShard(collectionName, SHARD1, null, null);
break;
} catch (HttpSolrClient.RemoteSolrException e) {
if (e.code() != 500) {
throw e;
}
log.error("SPLITSHARD failed. " + (i < 2 ? " Retring split" : ""), e);
if (i == 2) {
fail("SPLITSHARD was not successful even after three tries");
}
}
}
waitForRecoveriesToFinish(collectionName, false);
assertEquals(docCounts[0], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_0")).getResults().getNumFound());
assertEquals(docCounts[1], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_1")).getResults().getNumFound());
}
}
use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.
the class ShardSplitTest method testSplitAfterFailedSplit.
/**
* Used to test that we can split a shard when a previous split event
* left sub-shards in construction or recovery state.
*
* See SOLR-9439
*/
@Test
public void testSplitAfterFailedSplit() throws Exception {
waitForThingsToLevelOut(15);
// we definitely want split to fail
TestInjection.splitFailureBeforeReplicaCreation = "true:100";
try {
try {
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
splitShard.setShardName(SHARD1);
splitShard.process(cloudClient);
fail("Shard split was not supposed to succeed after failure injection!");
} catch (Exception e) {
// expected
}
// assert that sub-shards cores exist and sub-shard is in construction state
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
zkStateReader.forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
ClusterState state = zkStateReader.getClusterState();
DocCollection collection = state.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
Slice shard10 = collection.getSlice(SHARD1_0);
assertEquals(Slice.State.CONSTRUCTION, shard10.getState());
assertEquals(1, shard10.getReplicas().size());
Slice shard11 = collection.getSlice(SHARD1_1);
assertEquals(Slice.State.CONSTRUCTION, shard11.getState());
assertEquals(1, shard11.getReplicas().size());
// lets retry the split
// let the split succeed
TestInjection.reset();
try {
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
splitShard.setShardName(SHARD1);
splitShard.process(cloudClient);
// Yay!
} catch (Exception e) {
log.error("Shard split failed", e);
fail("Shard split did not succeed after a previous failed split attempt left sub-shards in construction state");
}
} finally {
TestInjection.reset();
}
}
use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.
the class ShardSplitTest method checkDocCountsAndShardStates.
protected void checkDocCountsAndShardStates(int[] docCounts, int numReplicas) throws Exception {
ClusterState clusterState = null;
Slice slice1_0 = null, slice1_1 = null;
int i = 0;
for (i = 0; i < 10; i++) {
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
clusterState = zkStateReader.getClusterState();
slice1_0 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, "shard1_0");
slice1_1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, "shard1_1");
if (slice1_0.getState() == Slice.State.ACTIVE && slice1_1.getState() == Slice.State.ACTIVE) {
break;
}
Thread.sleep(500);
}
log.info("ShardSplitTest waited for {} ms for shard state to be set to active", i * 500);
assertNotNull("Cluster state does not contain shard1_0", slice1_0);
assertNotNull("Cluster state does not contain shard1_0", slice1_1);
assertSame("shard1_0 is not active", Slice.State.ACTIVE, slice1_0.getState());
assertSame("shard1_1 is not active", Slice.State.ACTIVE, slice1_1.getState());
assertEquals("Wrong number of replicas created for shard1_0", numReplicas, slice1_0.getReplicas().size());
assertEquals("Wrong number of replicas created for shard1_1", numReplicas, slice1_1.getReplicas().size());
commit();
// can't use checkShardConsistency because it insists on jettys and clients for each shard
checkSubShardConsistency(SHARD1_0);
checkSubShardConsistency(SHARD1_1);
SolrQuery query = new SolrQuery("*:*").setRows(1000).setFields("id", "_version_");
query.set("distrib", false);
ZkCoreNodeProps shard1_0 = getLeaderUrlFromZk(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1_0);
QueryResponse response;
try (HttpSolrClient shard1_0Client = getHttpSolrClient(shard1_0.getCoreUrl())) {
response = shard1_0Client.query(query);
}
long shard10Count = response.getResults().getNumFound();
ZkCoreNodeProps shard1_1 = getLeaderUrlFromZk(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1_1);
QueryResponse response2;
try (HttpSolrClient shard1_1Client = getHttpSolrClient(shard1_1.getCoreUrl())) {
response2 = shard1_1Client.query(query);
}
long shard11Count = response2.getResults().getNumFound();
logDebugHelp(docCounts, response, shard10Count, response2, shard11Count);
assertEquals("Wrong doc count on shard1_0. See SOLR-5309", docCounts[0], shard10Count);
assertEquals("Wrong doc count on shard1_1. See SOLR-5309", docCounts[1], shard11Count);
}
use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.
the class ShardSplitTest method splitByUniqueKeyTest.
private void splitByUniqueKeyTest() throws Exception {
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
List<DocRouter.Range> subRanges = new ArrayList<>();
if (usually()) {
List<DocRouter.Range> ranges = router.partitionRange(4, shard1Range);
// 75% of range goes to shard1_0 and the rest to shard1_1
subRanges.add(new DocRouter.Range(ranges.get(0).min, ranges.get(2).max));
subRanges.add(ranges.get(3));
} else {
subRanges = router.partitionRange(2, shard1Range);
}
final List<DocRouter.Range> ranges = subRanges;
final int[] docCounts = new int[ranges.size()];
int numReplicas = shard1.getReplicas().size();
del("*:*");
for (int id = 0; id <= 100; id++) {
// See comment in ShardRoutingTest for hash distribution
String shardKey = "" + (char) ('a' + (id % 26));
indexAndUpdateCount(router, ranges, docCounts, shardKey + "!" + String.valueOf(id), id);
}
commit();
Thread indexThread = new Thread() {
@Override
public void run() {
Random random = random();
int max = atLeast(random, 401);
int sleep = atLeast(random, 25);
log.info("SHARDSPLITTEST: Going to add " + max + " number of docs at 1 doc per " + sleep + "ms");
Set<String> deleted = new HashSet<>();
for (int id = 101; id < max; id++) {
try {
indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id), id);
Thread.sleep(sleep);
if (usually(random)) {
String delId = String.valueOf(random.nextInt(id - 101 + 1) + 101);
if (deleted.contains(delId))
continue;
try {
deleteAndUpdateCount(router, ranges, docCounts, delId);
deleted.add(delId);
} catch (Exception e) {
log.error("Exception while deleting docs", e);
}
}
} catch (Exception e) {
log.error("Exception while adding doc id = " + id, e);
// do not select this id for deletion ever
deleted.add(String.valueOf(id));
}
}
}
};
indexThread.start();
try {
for (int i = 0; i < 3; i++) {
try {
splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1, subRanges, null);
log.info("Layout after split: \n");
printLayout();
break;
} catch (HttpSolrClient.RemoteSolrException e) {
if (e.code() != 500) {
throw e;
}
log.error("SPLITSHARD failed. " + (i < 2 ? " Retring split" : ""), e);
if (i == 2) {
fail("SPLITSHARD was not successful even after three tries");
}
}
}
} finally {
try {
indexThread.join();
} catch (InterruptedException e) {
log.error("Indexing thread interrupted", e);
}
}
waitForRecoveriesToFinish(true);
checkDocCountsAndShardStates(docCounts, numReplicas);
}
use of org.apache.solr.common.cloud.ClusterState in project lucene-solr by apache.
the class ShardSplitTest method checkSubShardConsistency.
protected void checkSubShardConsistency(String shard) throws SolrServerException, IOException {
SolrQuery query = new SolrQuery("*:*").setRows(1000).setFields("id", "_version_");
query.set("distrib", false);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
Slice slice = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, shard);
long[] numFound = new long[slice.getReplicasMap().size()];
int c = 0;
for (Replica replica : slice.getReplicas()) {
String coreUrl = new ZkCoreNodeProps(replica).getCoreUrl();
QueryResponse response;
try (HttpSolrClient client = getHttpSolrClient(coreUrl)) {
response = client.query(query);
}
numFound[c++] = response.getResults().getNumFound();
log.info("Shard: " + shard + " Replica: {} has {} docs", coreUrl, String.valueOf(response.getResults().getNumFound()));
assertTrue("Shard: " + shard + " Replica: " + coreUrl + " has 0 docs", response.getResults().getNumFound() > 0);
}
for (int i = 0; i < slice.getReplicasMap().size(); i++) {
assertEquals(shard + " is not consistent", numFound[0], numFound[i]);
}
}
Aggregations