use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class TopicStream method constructStreams.
protected void constructStreams() throws IOException {
try {
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
Collection<Slice> slices = CloudSolrStream.getSlices(this.collection, zkStateReader, false);
ModifiableSolrParams mParams = new ModifiableSolrParams(params);
// We are the aggregator.
mParams.set(DISTRIB, "false");
String fl = mParams.get("fl");
mParams.set(SORT, "_version_ asc");
if (!fl.contains(VERSION_FIELD)) {
fl += ",_version_";
}
mParams.set("fl", fl);
Random random = new Random();
ClusterState clusterState = zkStateReader.getClusterState();
Set<String> liveNodes = clusterState.getLiveNodes();
for (Slice slice : slices) {
ModifiableSolrParams localParams = new ModifiableSolrParams(mParams);
long checkpoint = checkpoints.get(slice.getName());
Collection<Replica> replicas = slice.getReplicas();
List<Replica> shuffler = new ArrayList<>();
for (Replica replica : replicas) {
if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName()))
shuffler.add(replica);
}
Replica rep = shuffler.get(random.nextInt(shuffler.size()));
ZkCoreNodeProps zkProps = new ZkCoreNodeProps(rep);
String url = zkProps.getCoreUrl();
SolrStream solrStream = new SolrStream(url, localParams);
solrStream.setSlice(slice.getName());
solrStream.setCheckpoint(checkpoint);
solrStream.setTrace(true);
if (streamContext != null) {
solrStream.setStreamContext(streamContext);
}
solrStreams.add(solrStream);
}
} catch (Exception e) {
throw new IOException(e);
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class TopicStream method getPersistedCheckpoints.
private void getPersistedCheckpoints() throws IOException {
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
Collection<Slice> slices = CloudSolrStream.getSlices(checkpointCollection, zkStateReader, false);
ClusterState clusterState = zkStateReader.getClusterState();
Set<String> liveNodes = clusterState.getLiveNodes();
OUTER: for (Slice slice : slices) {
Collection<Replica> replicas = slice.getReplicas();
for (Replica replica : replicas) {
if (replica.getState() == Replica.State.ACTIVE && liveNodes.contains(replica.getNodeName())) {
HttpSolrClient httpClient = streamContext.getSolrClientCache().getHttpSolrClient(replica.getCoreUrl());
try {
SolrDocument doc = httpClient.getById(id);
if (doc != null) {
List<String> checkpoints = (List<String>) doc.getFieldValue("checkpoint_ss");
for (String checkpoint : checkpoints) {
String[] pair = checkpoint.split("~");
this.checkpoints.put(pair[0], Long.parseLong(pair[1]));
}
}
} catch (Exception e) {
throw new IOException(e);
}
break OUTER;
}
}
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ChaosMonkeyNothingIsSafeWithPullReplicasTest method test.
@Test
public void test() throws Exception {
cloudClient.setSoTimeout(clientSoTimeout);
DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION);
assertEquals(this.sliceCount, docCollection.getSlices().size());
Slice s = docCollection.getSlice("shard1");
assertNotNull(s);
assertEquals("Unexpected number of replicas. Collection: " + docCollection, numRealtimeOrTlogReplicas + numPullReplicas, s.getReplicas().size());
assertEquals("Unexpected number of pull replicas. Collection: " + docCollection, numPullReplicas, s.getReplicas(EnumSet.of(Replica.Type.PULL)).size());
assertEquals(useTlogReplicas() ? 0 : numRealtimeOrTlogReplicas, s.getReplicas(EnumSet.of(Replica.Type.NRT)).size());
assertEquals(useTlogReplicas() ? numRealtimeOrTlogReplicas : 0, s.getReplicas(EnumSet.of(Replica.Type.TLOG)).size());
boolean testSuccessful = false;
try {
handle.clear();
handle.put("timestamp", SKIPVAL);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
// make sure we have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
}
// make sure we again have leaders for each shard
waitForRecoveriesToFinish(false);
// we cannot do delete by query
// as it's not supported for recovery
del("*:*");
List<StoppableThread> threads = new ArrayList<>();
List<StoppableIndexingThread> indexTreads = new ArrayList<>();
int threadCount = TEST_NIGHTLY ? 3 : 1;
int i = 0;
for (i = 0; i < threadCount; i++) {
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread);
indexTreads.add(indexThread);
indexThread.start();
}
threadCount = 1;
i = 0;
for (i = 0; i < threadCount; i++) {
StoppableSearchThread searchThread = new StoppableSearchThread(cloudClient);
threads.add(searchThread);
searchThread.start();
}
if (usually()) {
StoppableCommitThread commitThread = new StoppableCommitThread(cloudClient, 1000, false);
threads.add(commitThread);
commitThread.start();
}
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
boolean runFullThrottle = random().nextBoolean();
if (runFullThrottle) {
FullThrottleStoppableIndexingThread ftIndexThread = new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
threads.add(ftIndexThread);
ftIndexThread.start();
}
chaosMonkey.startTheMonkey(true, 10000);
try {
long runLength;
if (RUN_LENGTH != -1) {
runLength = RUN_LENGTH;
} else {
int[] runTimes;
if (TEST_NIGHTLY) {
runTimes = new int[] { 5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000 };
} else {
runTimes = new int[] { 5000, 7000, 15000 };
}
runLength = runTimes[random().nextInt(runTimes.length - 1)];
}
ChaosMonkey.wait(runLength, DEFAULT_COLLECTION, zkStateReader);
} finally {
chaosMonkey.stopTheMonkey();
}
// ideally this should go into chaosMonkey
restartZk(1000 * (5 + random().nextInt(4)));
for (StoppableThread indexThread : threads) {
indexThread.safeStop();
}
// wait for stop...
for (StoppableThread indexThread : threads) {
indexThread.join();
}
// try and wait for any replications and what not to finish...
ChaosMonkey.wait(2000, DEFAULT_COLLECTION, zkStateReader);
// wait until there are no recoveries...
//Math.round((runLength / 1000.0f / 3.0f)));
waitForThingsToLevelOut(Integer.MAX_VALUE);
// make sure we again have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
}
commit();
// TODO: assert we didnt kill everyone
zkStateReader.updateLiveNodes();
assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
// we expect full throttle fails, but cloud client should not easily fail
for (StoppableThread indexThread : threads) {
if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
int failCount = ((StoppableIndexingThread) indexThread).getFailCount();
assertFalse("There were too many update fails (" + failCount + " > " + FAIL_TOLERANCE + ") - we expect it can happen, but shouldn't easily", failCount > FAIL_TOLERANCE);
}
}
waitForReplicationFromReplicas(DEFAULT_COLLECTION, zkStateReader, new TimeOut(30, TimeUnit.SECONDS));
// waitForAllWarmingSearchers();
Set<String> addFails = getAddFails(indexTreads);
Set<String> deleteFails = getDeleteFails(indexTreads);
// full throttle thread can
// have request fails
checkShardConsistency(!runFullThrottle, true, addFails, deleteFails);
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
// ensure we have added more than 0 docs
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
if (VERBOSE)
System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
// sometimes we restart zookeeper as well
if (random().nextBoolean()) {
restartZk(1000 * (5 + random().nextInt(4)));
}
try (CloudSolrClient client = createCloudClient("collection1")) {
// We don't really know how many live nodes we have at this point, so "maxShardsPerNode" needs to be > 1
createCollection(null, "testcollection", 1, 1, 10, client, null, "conf1");
}
List<Integer> numShardsNumReplicas = new ArrayList<>(2);
numShardsNumReplicas.add(1);
numShardsNumReplicas.add(1 + getPullReplicaCount());
checkForCollection("testcollection", numShardsNumReplicas, null);
testSuccessful = true;
} finally {
if (!testSuccessful) {
logReplicaTypesReplicationInfo(DEFAULT_COLLECTION, cloudClient.getZkStateReader());
printLayout();
}
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ChaosMonkeyShardSplitTest method electNewOverseer.
/**
* Elects a new overseer
*
* @return SolrZkClient
*/
private SolrZkClient electNewOverseer(String address) throws KeeperException, InterruptedException, IOException {
SolrZkClient zkClient = new SolrZkClient(address, TIMEOUT);
ZkStateReader reader = new ZkStateReader(zkClient);
LeaderElector overseerElector = new LeaderElector(zkClient);
UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
// TODO: close Overseer
Overseer overseer = new Overseer(new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores", reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build());
overseer.close();
ElectionContext ec = new OverseerElectionContext(zkClient, overseer, address.replaceAll("/", "_"));
overseerElector.setup(ec);
overseerElector.joinElection(ec, false);
reader.close();
return zkClient;
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ChaosMonkeyNothingIsSafeTest method test.
@Test
public void test() throws Exception {
cloudClient.setSoTimeout(clientSoTimeout);
boolean testSuccessful = false;
try {
handle.clear();
handle.put("timestamp", SKIPVAL);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
// make sure we have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
}
// make sure we again have leaders for each shard
waitForRecoveriesToFinish(false);
// we cannot do delete by query
// as it's not supported for recovery
del("*:*");
List<StoppableThread> threads = new ArrayList<>();
List<StoppableIndexingThread> indexTreads = new ArrayList<>();
int threadCount = TEST_NIGHTLY ? 3 : 1;
int i = 0;
for (i = 0; i < threadCount; i++) {
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread);
indexTreads.add(indexThread);
indexThread.start();
}
threadCount = 1;
i = 0;
for (i = 0; i < threadCount; i++) {
StoppableSearchThread searchThread = new StoppableSearchThread(cloudClient);
threads.add(searchThread);
searchThread.start();
}
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
boolean runFullThrottle = random().nextBoolean();
if (runFullThrottle) {
FullThrottleStoppableIndexingThread ftIndexThread = new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
threads.add(ftIndexThread);
ftIndexThread.start();
}
chaosMonkey.startTheMonkey(true, 10000);
try {
long runLength;
if (RUN_LENGTH != -1) {
runLength = RUN_LENGTH;
} else {
int[] runTimes;
if (TEST_NIGHTLY) {
runTimes = new int[] { 5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000 };
} else {
runTimes = new int[] { 5000, 7000, 15000 };
}
runLength = runTimes[random().nextInt(runTimes.length - 1)];
}
Thread.sleep(runLength);
} finally {
chaosMonkey.stopTheMonkey();
}
// ideally this should go into chaosMonkey
restartZk(1000 * (5 + random().nextInt(4)));
for (StoppableThread indexThread : threads) {
indexThread.safeStop();
}
// wait for stop...
for (StoppableThread indexThread : threads) {
indexThread.join();
}
// try and wait for any replications and what not to finish...
Thread.sleep(2000);
// wait until there are no recoveries...
//Math.round((runLength / 1000.0f / 3.0f)));
waitForThingsToLevelOut(Integer.MAX_VALUE);
// make sure we again have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
}
commit();
// TODO: assert we didnt kill everyone
zkStateReader.updateLiveNodes();
assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
// we expect full throttle fails, but cloud client should not easily fail
for (StoppableThread indexThread : threads) {
if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
int failCount = ((StoppableIndexingThread) indexThread).getFailCount();
assertFalse("There were too many update fails (" + failCount + " > " + FAIL_TOLERANCE + ") - we expect it can happen, but shouldn't easily", failCount > FAIL_TOLERANCE);
}
}
Set<String> addFails = getAddFails(indexTreads);
Set<String> deleteFails = getDeleteFails(indexTreads);
// full throttle thread can
// have request fails
checkShardConsistency(!runFullThrottle, true, addFails, deleteFails);
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
// ensure we have added more than 0 docs
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
if (VERBOSE)
System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
// sometimes we restart zookeeper as well
if (random().nextBoolean()) {
restartZk(1000 * (5 + random().nextInt(4)));
}
try (CloudSolrClient client = createCloudClient("collection1")) {
createCollection(null, "testcollection", 1, 1, 1, client, null, "conf1");
}
List<Integer> numShardsNumReplicas = new ArrayList<>(2);
numShardsNumReplicas.add(1);
numShardsNumReplicas.add(1);
checkForCollection("testcollection", numShardsNumReplicas, null);
testSuccessful = true;
} finally {
if (!testSuccessful) {
printLayout();
}
}
}
Aggregations