use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class CoreSorterTest method getMockContainer.
private CoreContainer getMockContainer() {
CoreContainer mockCC = mock(CoreContainer.class);
ZkController mockZKC = mock(ZkController.class);
ClusterState mockClusterState = mock(ClusterState.class);
when(mockCC.isZooKeeperAware()).thenReturn(true);
when(mockCC.getZkController()).thenReturn(mockZKC);
when(mockClusterState.getLiveNodes()).thenReturn(liveNodes);
when(mockZKC.getClusterState()).thenReturn(mockClusterState);
return mockCC;
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class TestInPlaceUpdatesDistrib method delayedReorderingFetchesMissingUpdateFromLeaderTest.
private void delayedReorderingFetchesMissingUpdateFromLeaderTest() throws Exception {
clearIndex();
commit();
float inplace_updatable_float = 1F;
buildRandomIndex(inplace_updatable_float, Collections.singletonList(1));
float newinplace_updatable_float = 100F;
List<UpdateRequest> updates = new ArrayList<>();
updates.add(regularUpdateRequest("id", 1, "title_s", "title1_new", "id_i", 1, "inplace_updatable_float", newinplace_updatable_float));
updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
// The next request to replica2 will be delayed by 6 secs (timeout is 5s)
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 6000);
ExecutorService threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
for (UpdateRequest update : updates) {
AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
threadpool.submit(task);
// while we can't guarantee/trust what order the updates are executed in, since multiple threads
// are involved, but we're trying to bias the thread scheduling to run them in the order submitted
Thread.sleep(100);
}
threadpool.shutdown();
assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
commit();
// Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
for (int i = 0; i < 100; i++) {
Thread.sleep(10);
cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
ClusterState state = cloudClient.getZkStateReader().getClusterState();
int numActiveReplicas = 0;
for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
numActiveReplicas++;
assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
}
for (SolrClient client : clients) {
log.info("Testing client (Fetch missing test): " + ((HttpSolrClient) client).getBaseURL());
log.info("Version at " + ((HttpSolrClient) client).getBaseURL() + " is: " + getReplicaValue(client, 1, "_version_"));
assertReplicaValue(client, 1, "inplace_updatable_float", (newinplace_updatable_float + 2.0f), "inplace_updatable_float didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
assertReplicaValue(client, 1, "title_s", "title1_new", "Title didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
}
// Try another round of these updates, this time with a delete request at the end.
// This is to ensure that the fetch missing update from leader doesn't bomb out if the
// document has been deleted on the leader later on
{
clearIndex();
commit();
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().unsetDelay();
updates.add(regularDeleteRequest(1));
// the first update
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 5999);
// the delete update
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 4, 5998);
threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
for (UpdateRequest update : updates) {
AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
threadpool.submit(task);
// while we can't guarantee/trust what order the updates are executed in, since multiple threads
// are involved, but we're trying to bias the thread scheduling to run them in the order submitted
Thread.sleep(100);
}
threadpool.shutdown();
assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
commit();
// TODO: Could try checking ZK for LIR flags to ensure LIR has not kicked in
// Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
ZkController zkController = shardToLeaderJetty.get(SHARD1).jetty.getCoreContainer().getZkController();
String lirPath = zkController.getLeaderInitiatedRecoveryZnodePath(DEFAULT_TEST_COLLECTION_NAME, SHARD1);
assertFalse(zkController.getZkClient().exists(lirPath, true));
for (int i = 0; i < 100; i++) {
Thread.sleep(10);
cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
ClusterState state = cloudClient.getZkStateReader().getClusterState();
int numActiveReplicas = 0;
for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
numActiveReplicas++;
assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
}
for (SolrClient client : new SolrClient[] { LEADER, NONLEADERS.get(0), NONLEADERS.get(1) }) {
// nonleader 0 re-ordered replica, nonleader 1 well-ordered replica
SolrDocument doc = client.getById(String.valueOf(1), params("distrib", "false"));
assertNull("This doc was supposed to have been deleted, but was: " + doc, doc);
}
}
log.info("delayedReorderingFetchesMissingUpdateFromLeaderTest: This test passed fine...");
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class CdcrRequestHandler method handleCollectionCheckpointAction.
/**
* This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
* This checkpoint is used on the source cluster to setup the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
* This method will execute in parallel one
* {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
* then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
* pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
* updates to the target cluster while we are currently instantiating the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
* This solution only works in scenarios where the topology of the source and target clusters are identical.
*/
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
ZkController zkController = core.getCoreContainer().getZkController();
try {
zkController.getZkStateReader().forceUpdateCollection(collection);
} catch (Exception e) {
log.warn("Error when updating cluster state", e);
}
ClusterState cstate = zkController.getClusterState();
Collection<Slice> shards = cstate.getActiveSlices(collection);
ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
long checkpoint = Long.MAX_VALUE;
try {
List<Callable<Long>> callables = new ArrayList<>();
for (Slice shard : shards) {
ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
}
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
long version = future.get();
if (version < checkpoint) {
// we must take the lowest checkpoint from all the shards
checkpoint = version;
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} catch (ExecutionException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} finally {
parallelExecutor.shutdown();
}
rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class DocExpirationUpdateProcessorFactory method iAmInChargeOfPeriodicDeletes.
/**
* <p>
* Helper method that returns true if the Runnable managed by this factory
* should be responsible of doing periodical deletes.
* </p>
* <p>
* In simple standalone installations this method always returns true,
* but in cloud mode it will be true if and only if we are currently the leader
* of the (active) slice with the first name (lexicographically).
* </p>
* <p>
* If this method returns false, it may have also logged a message letting the user
* know why we aren't attempting period deletion (but it will attempt to not log
* this excessively)
* </p>
*/
private boolean iAmInChargeOfPeriodicDeletes() {
ZkController zk = core.getCoreContainer().getZkController();
if (null == zk)
return true;
// This is a lot simpler then doing our own "leader" election across all replicas
// of all shards since:
// a) we already have a per shard leader
// b) shard names must be unique
// c) ClusterState is already being "watched" by ZkController, no additional zk hits
// d) there might be multiple instances of this factory (in multiple chains) per
// collection, so picking an ephemeral node name for our election would be tricky
CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor();
String col = desc.getCollectionName();
List<Slice> slices = new ArrayList<Slice>(zk.getClusterState().getActiveSlices(col));
Collections.sort(slices, COMPARE_SLICES_BY_NAME);
if (slices.isEmpty()) {
log.error("Collection {} has no active Slices?", col);
return false;
}
Replica firstSliceLeader = slices.get(0).getLeader();
if (null == firstSliceLeader) {
log.warn("Slice in charge of periodic deletes for {} does not currently have a leader", col);
return false;
}
String leaderInCharge = firstSliceLeader.getName();
String myCoreNodeName = desc.getCoreNodeName();
boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName);
if (previouslyInChargeOfDeletes && !inChargeOfDeletesRightNow) {
// don't spam the logs constantly, just log when we know that we're not the guy
// (the first time -- or anytime we were, but no longer are)
log.info("Not currently in charge of periodic deletes for this collection, " + "will not trigger delete or log again until this changes");
}
previouslyInChargeOfDeletes = inChargeOfDeletesRightNow;
return inChargeOfDeletesRightNow;
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class PeerSync method msg.
// start of peersync related debug messages. includes the core name for correlation.
private String msg() {
ZkController zkController = uhandler.core.getCoreContainer().getZkController();
String myURL = "";
if (zkController != null) {
myURL = zkController.getBaseUrl();
}
// TODO: core name turns up blank in many tests - find URL if cloud enabled?
return "PeerSync: core=" + uhandler.core.getName() + " url=" + myURL + " ";
}
Aggregations