use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ZkStateReaderTest method testStateFormatUpdate.
public void testStateFormatUpdate(boolean explicitRefresh, boolean isInteresting) throws Exception {
String zkDir = createTempDir("testStateFormatUpdate").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
ZkStateReader reader = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
reader = new ZkStateReader(zkClient);
reader.createClusterStateWatchersAndUpdate();
if (isInteresting) {
reader.registerCore("c1");
}
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
{
// create new collection with stateFormat = 1
DocCollection stateV1 = new DocCollection("c1", new HashMap<>(), new HashMap<>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE);
ZkWriteCommand c1 = new ZkWriteCommand("c1", stateV1);
writer.enqueueUpdate(reader.getClusterState(), c1, null);
writer.writePendingUpdates();
Map map = (Map) Utils.fromJSON(zkClient.getData("/clusterstate.json", null, null, true));
assertNotNull(map.get("c1"));
boolean exists = zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true);
assertFalse(exists);
if (explicitRefresh) {
reader.forceUpdateCollection("c1");
} else {
reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null);
}
DocCollection collection = reader.getClusterState().getCollection("c1");
assertEquals(1, collection.getStateFormat());
}
{
// Now update the collection to stateFormat = 2
DocCollection stateV2 = new DocCollection("c1", new HashMap<>(), new HashMap<>(), DocRouter.DEFAULT, 0, ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json");
ZkWriteCommand c2 = new ZkWriteCommand("c1", stateV2);
writer.enqueueUpdate(reader.getClusterState(), c2, null);
writer.writePendingUpdates();
Map map = (Map) Utils.fromJSON(zkClient.getData("/clusterstate.json", null, null, true));
assertNull(map.get("c1"));
boolean exists = zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true);
assertTrue(exists);
if (explicitRefresh) {
reader.forceUpdateCollection("c1");
} else {
reader.waitForState("c1", TIMEOUT, TimeUnit.SECONDS, (n, c) -> c != null && c.getStateFormat() == 2);
}
DocCollection collection = reader.getClusterState().getCollection("c1");
assertEquals(2, collection.getStateFormat());
}
} finally {
IOUtils.close(reader, zkClient);
server.shutdown();
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ZkStateWriterTest method testZkStateWriterBatching.
public void testZkStateWriterBatching() throws Exception {
String zkDir = createTempDir("testZkStateWriterBatching").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
try (ZkStateReader reader = new ZkStateReader(zkClient)) {
reader.createClusterStateWatchersAndUpdate();
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
assertFalse("Deletes can always be batched", writer.maybeFlushBefore(new ZkWriteCommand("xyz", null)));
assertFalse("Deletes can always be batched", writer.maybeFlushAfter(new ZkWriteCommand("xyz", null)));
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c2", true);
// create new collection with stateFormat = 2
ZkWriteCommand c1 = new ZkWriteCommand("c1", new DocCollection("c1", new HashMap<>(), new HashMap<>(), DocRouter.DEFAULT, 0, ZkStateReader.COLLECTIONS_ZKNODE + "/c1"));
assertFalse("First requests can always be batched", writer.maybeFlushBefore(c1));
ClusterState clusterState = writer.enqueueUpdate(reader.getClusterState(), c1, null);
ZkWriteCommand c2 = new ZkWriteCommand("c2", new DocCollection("c2", new HashMap<>(), new HashMap<>(), DocRouter.DEFAULT, 0, ZkStateReader.COLLECTIONS_ZKNODE + "/c2"));
assertFalse("Different (new) collection create can be batched together with another create", writer.maybeFlushBefore(c2));
// simulate three state changes on same collection, all should be batched together before
assertFalse(writer.maybeFlushBefore(c1));
assertFalse(writer.maybeFlushBefore(c1));
assertFalse(writer.maybeFlushBefore(c1));
// and after too
assertFalse(writer.maybeFlushAfter(c1));
assertFalse(writer.maybeFlushAfter(c1));
assertFalse(writer.maybeFlushAfter(c1));
// simulate three state changes on two different collections with stateFormat=2, all should be batched
assertFalse(writer.maybeFlushBefore(c1));
// flushAfter has to be called as it updates the internal batching related info
assertFalse(writer.maybeFlushAfter(c1));
assertFalse(writer.maybeFlushBefore(c2));
assertFalse(writer.maybeFlushAfter(c2));
assertFalse(writer.maybeFlushBefore(c1));
assertFalse(writer.maybeFlushAfter(c1));
// create a collection in stateFormat = 1 i.e. inside the main cluster state
ZkWriteCommand c3 = new ZkWriteCommand("c3", new DocCollection("c3", new HashMap<>(), new HashMap<>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE));
clusterState = writer.enqueueUpdate(clusterState, c3, null);
// simulate three state changes in c3, all should be batched
for (int i = 0; i < 3; i++) {
assertFalse(writer.maybeFlushBefore(c3));
assertFalse(writer.maybeFlushAfter(c3));
}
// simulate state change in c3 (stateFormat=1) interleaved with state changes from c1,c2 (stateFormat=2)
// none should be batched together
assertFalse(writer.maybeFlushBefore(c3));
assertFalse(writer.maybeFlushAfter(c3));
assertTrue("different stateFormat, should be flushed", writer.maybeFlushBefore(c1));
assertFalse(writer.maybeFlushAfter(c1));
assertTrue("different stateFormat, should be flushed", writer.maybeFlushBefore(c3));
assertFalse(writer.maybeFlushAfter(c3));
assertTrue("different stateFormat, should be flushed", writer.maybeFlushBefore(c2));
assertFalse(writer.maybeFlushAfter(c2));
}
} finally {
IOUtils.close(zkClient);
server.shutdown();
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ZkStateWriterTest method testSingleLegacyCollection.
public void testSingleLegacyCollection() throws Exception {
String zkDir = createTempDir("testSingleLegacyCollection").toFile().getAbsolutePath();
ZkTestServer server = new ZkTestServer(zkDir);
SolrZkClient zkClient = null;
try {
server.run();
AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
ZkController.createClusterZkNodes(zkClient);
try (ZkStateReader reader = new ZkStateReader(zkClient)) {
reader.createClusterStateWatchersAndUpdate();
ZkStateWriter writer = new ZkStateWriter(reader, new Overseer.Stats());
zkClient.makePath(ZkStateReader.COLLECTIONS_ZKNODE + "/c1", true);
// create new collection with stateFormat = 1
ZkWriteCommand c1 = new ZkWriteCommand("c1", new DocCollection("c1", new HashMap<String, Slice>(), new HashMap<String, Object>(), DocRouter.DEFAULT, 0, ZkStateReader.CLUSTER_STATE));
ClusterState clusterState = writer.enqueueUpdate(reader.getClusterState(), c1, null);
writer.writePendingUpdates();
Map map = (Map) Utils.fromJSON(zkClient.getData("/clusterstate.json", null, null, true));
assertNotNull(map.get("c1"));
boolean exists = zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/c1/state.json", true);
assertFalse(exists);
}
} finally {
IOUtils.close(zkClient);
server.shutdown();
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class ShardSplitTest method testSplitWithChaosMonkey.
@Test
public void testSplitWithChaosMonkey() throws Exception {
waitForThingsToLevelOut(15);
List<StoppableIndexingThread> indexers = new ArrayList<>();
try {
for (int i = 0; i < 1; i++) {
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, cloudClient, String.valueOf(i), true);
indexers.add(thread);
thread.start();
}
// give the indexers some time to do their work
Thread.sleep(1000);
} catch (Exception e) {
log.error("Error in test", e);
} finally {
for (StoppableIndexingThread indexer : indexers) {
indexer.safeStop();
indexer.join();
}
}
cloudClient.commit();
controlClient.commit();
AtomicBoolean stop = new AtomicBoolean();
AtomicBoolean killed = new AtomicBoolean(false);
Runnable monkey = new Runnable() {
@Override
public void run() {
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
zkStateReader.registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
if (stop.get()) {
// abort and remove the watch
return true;
}
Slice slice = collectionState.getSlice(SHARD1_0);
if (slice != null && slice.getReplicas().size() > 1) {
// ensure that only one watcher invocation thread can kill!
if (killed.compareAndSet(false, true)) {
log.info("Monkey thread found 2 replicas for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
try {
Thread.sleep(1000 + random().nextInt(500));
ChaosMonkey.kill(cjetty);
stop.set(true);
return true;
} catch (Exception e) {
log.error("Monkey unable to kill jetty at port " + cjetty.jetty.getLocalPort(), e);
}
}
}
log.info("Monkey thread found only one replica for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
return false;
}
});
}
};
Thread monkeyThread = null;
/*
somehow the cluster state object inside this zk state reader has static copy of the collection which is never updated
so any call to waitForRecoveriesToFinish just keeps looping until timeout.
We workaround by explicitly registering the collection as an interesting one so that it is watched by ZkStateReader
see SOLR-9440. Todo remove this hack after SOLR-9440 is fixed.
*/
cloudClient.getZkStateReader().registerCore(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
monkeyThread = new Thread(monkey);
monkeyThread.start();
try {
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
splitShard.setShardName(SHARD1);
String asyncId = splitShard.processAsync(cloudClient);
RequestStatusState splitStatus = null;
try {
splitStatus = CollectionAdminRequest.requestStatus(asyncId).waitFor(cloudClient, 120);
} catch (Exception e) {
log.warn("Failed to get request status, maybe because the overseer node was shutdown by monkey", e);
}
// we don't care if the split failed because we are injecting faults and it is likely
// that the split has failed but in any case we want to assert that all docs that got
// indexed are available in SolrCloud and if the split succeeded then all replicas of the sub-shard
// must be consistent (i.e. have same numdocs)
log.info("Shard split request state is COMPLETED");
stop.set(true);
monkeyThread.join();
Set<String> addFails = new HashSet<>();
Set<String> deleteFails = new HashSet<>();
for (StoppableIndexingThread indexer : indexers) {
addFails.addAll(indexer.getAddFails());
deleteFails.addAll(indexer.getDeleteFails());
}
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
log.info("Starting shard1 leader jetty at port {}", cjetty.jetty.getLocalPort());
ChaosMonkey.start(cjetty.jetty);
cloudClient.getZkStateReader().forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
log.info("Current collection state: {}", printClusterStateInfo(AbstractDistribZkTestBase.DEFAULT_COLLECTION));
boolean replicaCreationsFailed = false;
if (splitStatus == RequestStatusState.FAILED) {
// either one or more replica creation failed (because it may have been created on the same parent shard leader node)
// or the split may have failed while trying to soft-commit *after* all replicas have been created
// the latter counts as a successful switch even if the API doesn't say so
// so we must find a way to distinguish between the two
// an easy way to do that is to look at the sub-shard replicas and check if the replica core actually exists
// instead of existing solely inside the cluster state
DocCollection collectionState = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null) {
for (Replica replica : slice10) {
if (!doesReplicaCoreExist(replica)) {
replicaCreationsFailed = true;
break;
}
}
for (Replica replica : slice11) {
if (!doesReplicaCoreExist(replica)) {
replicaCreationsFailed = true;
break;
}
}
}
}
// true if sub-shard states switch to 'active' eventually
AtomicBoolean areSubShardsActive = new AtomicBoolean(false);
if (!replicaCreationsFailed) {
// all sub-shard replicas were created successfully so all cores must recover eventually
waitForRecoveriesToFinish(AbstractDistribZkTestBase.DEFAULT_COLLECTION, true);
// let's wait for the overseer to switch shard states
CountDownLatch latch = new CountDownLatch(1);
cloudClient.getZkStateReader().registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
Slice parent = collectionState.getSlice(SHARD1);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null && parent.getState() == Slice.State.INACTIVE && slice10.getState() == Slice.State.ACTIVE && slice11.getState() == Slice.State.ACTIVE) {
areSubShardsActive.set(true);
latch.countDown();
// removes the watch
return true;
} else if (slice10 != null && slice11 != null && parent.getState() == Slice.State.ACTIVE && slice10.getState() == Slice.State.RECOVERY_FAILED && slice11.getState() == Slice.State.RECOVERY_FAILED) {
areSubShardsActive.set(false);
latch.countDown();
return true;
}
return false;
}
});
latch.await(2, TimeUnit.MINUTES);
if (latch.getCount() != 0) {
// sanity check
fail("We think that split was successful but sub-shard states were not updated even after 2 minutes.");
}
}
// for visibility of results on sub-shards
cloudClient.commit();
checkShardConsistency(true, true, addFails, deleteFails);
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
// ensure we have added more than 0 docs
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
assertEquals("Found " + ctrlDocs + " control docs and " + cloudClientDocs + " cloud docs", ctrlDocs, cloudClientDocs);
// handle new shards/replica so well.
if (areSubShardsActive.get()) {
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
DocCollection collection = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_1));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_1", 2, numReplicasChecked);
}
} finally {
stop.set(true);
monkeyThread.join();
}
}
use of org.apache.solr.common.cloud.ZkStateReader in project lucene-solr by apache.
the class TestAuthenticationFramework method collectionCreateSearchDelete.
public void collectionCreateSearchDelete(MiniSolrCloudCluster miniCluster) throws Exception {
final String collectionName = "testcollection";
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
assertNotNull(miniCluster.getZkServer());
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
assertEquals(NUM_SERVERS, jettys.size());
for (JettySolrRunner jetty : jettys) {
assertTrue(jetty.isRunning());
}
// create collection
log.info("#### Creating a collection");
final String asyncId = (random().nextBoolean() ? null : "asyncId(" + collectionName + ".create)=" + random().nextInt());
createCollection(miniCluster, collectionName, asyncId);
ZkStateReader zkStateReader = miniCluster.getSolrClient().getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// modify/query collection
log.info("#### updating a querying collection");
cloudSolrClient.setDefaultCollection(collectionName);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "1");
cloudSolrClient.add(doc);
cloudSolrClient.commit();
SolrQuery query = new SolrQuery();
query.setQuery("*:*");
QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(1, rsp.getResults().getNumFound());
// delete the collection we created earlier
CollectionAdminRequest.deleteCollection(collectionName).process(miniCluster.getSolrClient());
// create it again
String asyncId2 = (random().nextBoolean() ? null : "asyncId(" + collectionName + ".create)=" + random().nextInt());
createCollection(miniCluster, collectionName, asyncId2);
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// check that there's no left-over state
assertEquals(0, cloudSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
cloudSolrClient.add(doc);
cloudSolrClient.commit();
assertEquals(1, cloudSolrClient.query(new SolrQuery("*:*")).getResults().getNumFound());
}
Aggregations