use of org.apache.solr.client.solrj.impl.CloudSolrClient in project lucene-solr by apache.
the class ChaosMonkeyNothingIsSafeTest method test.
@Test
public void test() throws Exception {
cloudClient.setSoTimeout(clientSoTimeout);
boolean testSuccessful = false;
try {
handle.clear();
handle.put("timestamp", SKIPVAL);
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
// make sure we have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 10000);
}
// make sure we again have leaders for each shard
waitForRecoveriesToFinish(false);
// we cannot do delete by query
// as it's not supported for recovery
del("*:*");
List<StoppableThread> threads = new ArrayList<>();
List<StoppableIndexingThread> indexTreads = new ArrayList<>();
int threadCount = TEST_NIGHTLY ? 3 : 1;
int i = 0;
for (i = 0; i < threadCount; i++) {
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
threads.add(indexThread);
indexTreads.add(indexThread);
indexThread.start();
}
threadCount = 1;
i = 0;
for (i = 0; i < threadCount; i++) {
StoppableSearchThread searchThread = new StoppableSearchThread(cloudClient);
threads.add(searchThread);
searchThread.start();
}
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
boolean runFullThrottle = random().nextBoolean();
if (runFullThrottle) {
FullThrottleStoppableIndexingThread ftIndexThread = new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
threads.add(ftIndexThread);
ftIndexThread.start();
}
chaosMonkey.startTheMonkey(true, 10000);
try {
long runLength;
if (RUN_LENGTH != -1) {
runLength = RUN_LENGTH;
} else {
int[] runTimes;
if (TEST_NIGHTLY) {
runTimes = new int[] { 5000, 6000, 10000, 15000, 25000, 30000, 30000, 45000, 90000, 120000 };
} else {
runTimes = new int[] { 5000, 7000, 15000 };
}
runLength = runTimes[random().nextInt(runTimes.length - 1)];
}
Thread.sleep(runLength);
} finally {
chaosMonkey.stopTheMonkey();
}
// ideally this should go into chaosMonkey
restartZk(1000 * (5 + random().nextInt(4)));
for (StoppableThread indexThread : threads) {
indexThread.safeStop();
}
// wait for stop...
for (StoppableThread indexThread : threads) {
indexThread.join();
}
// try and wait for any replications and what not to finish...
Thread.sleep(2000);
// wait until there are no recoveries...
//Math.round((runLength / 1000.0f / 3.0f)));
waitForThingsToLevelOut(Integer.MAX_VALUE);
// make sure we again have leaders for each shard
for (int j = 1; j < sliceCount; j++) {
zkStateReader.getLeaderRetry(DEFAULT_COLLECTION, "shard" + j, 30000);
}
commit();
// TODO: assert we didnt kill everyone
zkStateReader.updateLiveNodes();
assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
// we expect full throttle fails, but cloud client should not easily fail
for (StoppableThread indexThread : threads) {
if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
int failCount = ((StoppableIndexingThread) indexThread).getFailCount();
assertFalse("There were too many update fails (" + failCount + " > " + FAIL_TOLERANCE + ") - we expect it can happen, but shouldn't easily", failCount > FAIL_TOLERANCE);
}
}
Set<String> addFails = getAddFails(indexTreads);
Set<String> deleteFails = getDeleteFails(indexTreads);
// full throttle thread can
// have request fails
checkShardConsistency(!runFullThrottle, true, addFails, deleteFails);
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
// ensure we have added more than 0 docs
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
if (VERBOSE)
System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
// sometimes we restart zookeeper as well
if (random().nextBoolean()) {
restartZk(1000 * (5 + random().nextInt(4)));
}
try (CloudSolrClient client = createCloudClient("collection1")) {
createCollection(null, "testcollection", 1, 1, 1, client, null, "conf1");
}
List<Integer> numShardsNumReplicas = new ArrayList<>(2);
numShardsNumReplicas.add(1);
numShardsNumReplicas.add(1);
checkForCollection("testcollection", numShardsNumReplicas, null);
testSuccessful = true;
} finally {
if (!testSuccessful) {
printLayout();
}
}
}
use of org.apache.solr.client.solrj.impl.CloudSolrClient in project lucene-solr by apache.
the class CollectionsAPIAsyncDistributedZkTest method testSolrJAPICalls.
@Test
public void testSolrJAPICalls() throws Exception {
final CloudSolrClient client = cluster.getSolrClient();
RequestStatusState state = CollectionAdminRequest.createCollection("testasynccollectioncreation", "conf1", 1, 1).processAndWait(client, MAX_TIMEOUT_SECONDS);
assertSame("CreateCollection task did not complete!", RequestStatusState.COMPLETED, state);
state = CollectionAdminRequest.createCollection("testasynccollectioncreation", "conf1", 1, 1).processAndWait(client, MAX_TIMEOUT_SECONDS);
assertSame("Recreating a collection with the same should have failed.", RequestStatusState.FAILED, state);
state = CollectionAdminRequest.addReplicaToShard("testasynccollectioncreation", "shard1").processAndWait(client, MAX_TIMEOUT_SECONDS);
assertSame("Add replica did not complete", RequestStatusState.COMPLETED, state);
state = CollectionAdminRequest.splitShard("testasynccollectioncreation").setShardName("shard1").processAndWait(client, MAX_TIMEOUT_SECONDS * 2);
assertEquals("Shard split did not complete. Last recorded state: " + state, RequestStatusState.COMPLETED, state);
}
use of org.apache.solr.client.solrj.impl.CloudSolrClient in project lucene-solr by apache.
the class CdcrReplicationHandlerTest method testPartialReplicationWithTruncatedTlog.
/**
* Test the scenario where the slave is killed before receiving a commit. This creates a truncated tlog
* file on the slave node. The replication strategy should detect this truncated file, and fetch the
* non-truncated file from the leader.
*/
@Test
@ShardsFixed(num = 2)
public void testPartialReplicationWithTruncatedTlog() throws Exception {
CloudSolrClient client = createCloudClient(SOURCE_COLLECTION);
List<CloudJettyRunner> slaves = this.getShardToSlaveJetty(SOURCE_COLLECTION, SHARD1);
try {
for (int i = 0; i < 10; i++) {
for (int j = i * 20; j < (i * 20) + 20; j++) {
client.add(getDoc(id, Integer.toString(j)));
// Stop the slave in the middle of a batch to create a truncated tlog on the slave
if (j == 45) {
ChaosMonkey.stop(slaves.get(0).jetty);
}
}
commit(SOURCE_COLLECTION);
}
} finally {
client.close();
}
assertNumDocs(200, SOURCE_COLLECTION);
// Restart the slave node to trigger Replication recovery
this.restartServer(slaves.get(0));
// at this stage, the slave should have replicated the 5 missing tlog files
this.assertUpdateLogsEquals(SOURCE_COLLECTION, 10);
}
use of org.apache.solr.client.solrj.impl.CloudSolrClient in project incubator-atlas by apache.
the class Solr5Index method clearStorage.
@Override
public void clearStorage() throws BackendException {
try {
if (mode != Mode.CLOUD)
throw new UnsupportedOperationException("Operation only supported for SolrCloud");
logger.debug("Clearing storage from Solr: {}", solrClient);
ZkStateReader zkStateReader = ((CloudSolrClient) solrClient).getZkStateReader();
zkStateReader.updateClusterState();
ClusterState clusterState = zkStateReader.getClusterState();
for (String collection : clusterState.getCollections()) {
logger.debug("Clearing collection [{}] in Solr", collection);
UpdateRequest deleteAll = newUpdateRequest();
deleteAll.deleteByQuery("*:*");
solrClient.request(deleteAll, collection);
}
} catch (SolrServerException e) {
logger.error("Unable to clear storage from index due to server error on Solr.", e);
throw new PermanentBackendException(e);
} catch (IOException e) {
logger.error("Unable to clear storage from index due to low-level I/O error.", e);
throw new PermanentBackendException(e);
} catch (Exception e) {
logger.error("Unable to clear storage from index due to general error.", e);
throw new PermanentBackendException(e);
}
}
use of org.apache.solr.client.solrj.impl.CloudSolrClient in project ddf by codice.
the class SolrCloudClientFactory method isCollectionReady.
private static boolean isCollectionReady(CloudSolrClient client, String collection) {
RetryPolicy retryPolicy = new RetryPolicy().retryWhen(false).withMaxRetries(30).withDelay(1, TimeUnit.SECONDS);
boolean collectionCreated = Failsafe.with(retryPolicy).onFailure(failure -> LOGGER.debug("All attempts failed to read Zookeeper state for collection existence (" + collection + ")", failure)).get(() -> client.getZkStateReader().getClusterState().hasCollection(collection));
if (!collectionCreated) {
LOGGER.debug("Timeout while waiting for collection to be created: " + collection);
return false;
}
boolean shardsStarted = Failsafe.with(retryPolicy).onFailure(failure -> LOGGER.debug("All attempts failed to read Zookeeper state for collection's shard count (" + collection + ")", failure)).get(() -> client.getZkStateReader().getClusterState().getSlices(collection).size() == SHARD_COUNT);
if (!shardsStarted) {
LOGGER.debug("Timeout while waiting for collection shards to start: " + collection);
}
return shardsStarted;
}
Aggregations