use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class QueryElevationComponent method getElevationMap.
//get the elevation map from the data dir
Map<String, ElevationObj> getElevationMap(IndexReader reader, SolrCore core) throws Exception {
synchronized (elevationCache) {
Map<String, ElevationObj> map = elevationCache.get(null);
if (map != null)
return map;
map = elevationCache.get(reader);
if (map == null) {
String f = initArgs.get(CONFIG_FILE);
if (f == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "QueryElevationComponent must specify argument: " + CONFIG_FILE);
}
log.info("Loading QueryElevation from data dir: " + f);
Config cfg;
ZkController zkController = core.getCoreContainer().getZkController();
if (zkController != null) {
cfg = new Config(core.getResourceLoader(), f, null, null);
} else {
InputStream is = VersionedFile.getLatestFile(core.getDataDir(), f);
cfg = new Config(core.getResourceLoader(), f, new InputSource(is), null);
}
map = loadElevationMap(cfg);
elevationCache.put(reader, map);
}
return map;
}
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class TestInPlaceUpdatesDistrib method delayedReorderingFetchesMissingUpdateFromLeaderTest.
private void delayedReorderingFetchesMissingUpdateFromLeaderTest() throws Exception {
clearIndex();
commit();
float inplace_updatable_float = 1F;
buildRandomIndex(inplace_updatable_float, Collections.singletonList(1));
float newinplace_updatable_float = 100F;
List<UpdateRequest> updates = new ArrayList<>();
updates.add(regularUpdateRequest("id", 1, "title_s", "title1_new", "id_i", 1, "inplace_updatable_float", newinplace_updatable_float));
updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
updates.add(regularUpdateRequest("id", 1, "inplace_updatable_float", map("inc", 1)));
// The next request to replica2 will be delayed by 6 secs (timeout is 5s)
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 6000);
ExecutorService threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
for (UpdateRequest update : updates) {
AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
threadpool.submit(task);
// while we can't guarantee/trust what order the updates are executed in, since multiple threads
// are involved, but we're trying to bias the thread scheduling to run them in the order submitted
Thread.sleep(100);
}
threadpool.shutdown();
assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
commit();
// Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
for (int i = 0; i < 100; i++) {
Thread.sleep(10);
cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
ClusterState state = cloudClient.getZkStateReader().getClusterState();
int numActiveReplicas = 0;
for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
numActiveReplicas++;
assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
}
for (SolrClient client : clients) {
log.info("Testing client (Fetch missing test): " + ((HttpSolrClient) client).getBaseURL());
log.info("Version at " + ((HttpSolrClient) client).getBaseURL() + " is: " + getReplicaValue(client, 1, "_version_"));
assertReplicaValue(client, 1, "inplace_updatable_float", (newinplace_updatable_float + 2.0f), "inplace_updatable_float didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
assertReplicaValue(client, 1, "title_s", "title1_new", "Title didn't match for replica at client: " + ((HttpSolrClient) client).getBaseURL());
}
// Try another round of these updates, this time with a delete request at the end.
// This is to ensure that the fetch missing update from leader doesn't bomb out if the
// document has been deleted on the leader later on
{
clearIndex();
commit();
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().unsetDelay();
updates.add(regularDeleteRequest(1));
// the first update
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 1, 5999);
// the delete update
shardToJetty.get(SHARD1).get(1).jetty.getDebugFilter().addDelay("Waiting for dependant update to timeout", 4, 5998);
threadpool = ExecutorUtil.newMDCAwareFixedThreadPool(updates.size() + 1, new DefaultSolrThreadFactory(getTestName()));
for (UpdateRequest update : updates) {
AsyncUpdateWithRandomCommit task = new AsyncUpdateWithRandomCommit(update, cloudClient, random().nextLong());
threadpool.submit(task);
// while we can't guarantee/trust what order the updates are executed in, since multiple threads
// are involved, but we're trying to bias the thread scheduling to run them in the order submitted
Thread.sleep(100);
}
threadpool.shutdown();
assertTrue("Thread pool didn't terminate within 15 secs", threadpool.awaitTermination(15, TimeUnit.SECONDS));
commit();
// TODO: Could try checking ZK for LIR flags to ensure LIR has not kicked in
// Check every 10ms, 100 times, for a replica to go down (& assert that it doesn't)
ZkController zkController = shardToLeaderJetty.get(SHARD1).jetty.getCoreContainer().getZkController();
String lirPath = zkController.getLeaderInitiatedRecoveryZnodePath(DEFAULT_TEST_COLLECTION_NAME, SHARD1);
assertFalse(zkController.getZkClient().exists(lirPath, true));
for (int i = 0; i < 100; i++) {
Thread.sleep(10);
cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
ClusterState state = cloudClient.getZkStateReader().getClusterState();
int numActiveReplicas = 0;
for (Replica rep : state.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1).getReplicas()) if (rep.getState().equals(Replica.State.ACTIVE))
numActiveReplicas++;
assertEquals("The replica receiving reordered updates must not have gone down", 3, numActiveReplicas);
}
for (SolrClient client : new SolrClient[] { LEADER, NONLEADERS.get(0), NONLEADERS.get(1) }) {
// nonleader 0 re-ordered replica, nonleader 1 well-ordered replica
SolrDocument doc = client.getById(String.valueOf(1), params("distrib", "false"));
assertNull("This doc was supposed to have been deleted, but was: " + doc, doc);
}
}
log.info("delayedReorderingFetchesMissingUpdateFromLeaderTest: This test passed fine...");
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class ManagedIndexSchema method persistManagedSchemaToZooKeeper.
/**
* Persists the managed schema to ZooKeeper using optimistic concurrency.
* <p/>
* If createOnly is true, success is when the schema is created or if it previously existed.
* <p/>
* If createOnly is false, success is when the schema is persisted - this will only happen
* if schemaZkVersion matches the version in ZooKeeper.
*
* @return true on success
*/
boolean persistManagedSchemaToZooKeeper(boolean createOnly) {
final ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader) loader;
final ZkController zkController = zkLoader.getZkController();
final SolrZkClient zkClient = zkController.getZkClient();
final String managedSchemaPath = zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
boolean success = true;
boolean schemaChangedInZk = false;
try {
// Persist the managed schema
StringWriter writer = new StringWriter();
persist(writer);
final byte[] data = writer.toString().getBytes(StandardCharsets.UTF_8);
if (createOnly) {
try {
zkClient.create(managedSchemaPath, data, CreateMode.PERSISTENT, true);
schemaZkVersion = 0;
log.info("Created and persisted managed schema znode at " + managedSchemaPath);
} catch (KeeperException.NodeExistsException e) {
// This is okay - do nothing and fall through
log.info("Managed schema znode at " + managedSchemaPath + " already exists - no need to create it");
}
} else {
try {
// Assumption: the path exists
Stat stat = zkClient.setData(managedSchemaPath, data, schemaZkVersion, true);
schemaZkVersion = stat.getVersion();
log.info("Persisted managed schema version " + schemaZkVersion + " at " + managedSchemaPath);
} catch (KeeperException.BadVersionException e) {
log.error("Bad version when trying to persist schema using " + schemaZkVersion + " due to: " + e);
success = false;
schemaChangedInZk = true;
}
}
} catch (Exception e) {
if (e instanceof InterruptedException) {
// Restore the interrupted status
Thread.currentThread().interrupt();
}
final String msg = "Error persisting managed schema at " + managedSchemaPath;
log.error(msg, e);
throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
}
if (schemaChangedInZk) {
String msg = "Failed to persist managed schema at " + managedSchemaPath + " - version mismatch";
log.info(msg);
throw new SchemaChangedInZkException(ErrorCode.CONFLICT, msg + ", retry.");
}
return success;
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class CdcrRequestHandler method handleCollectionCheckpointAction.
/**
* This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
* This checkpoint is used on the source cluster to setup the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
* This method will execute in parallel one
* {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
* then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
* pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
* updates to the target cluster while we are currently instantiating the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
* This solution only works in scenarios where the topology of the source and target clusters are identical.
*/
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
ZkController zkController = core.getCoreContainer().getZkController();
try {
zkController.getZkStateReader().forceUpdateCollection(collection);
} catch (Exception e) {
log.warn("Error when updating cluster state", e);
}
ClusterState cstate = zkController.getClusterState();
Collection<Slice> shards = cstate.getActiveSlices(collection);
ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
long checkpoint = Long.MAX_VALUE;
try {
List<Callable<Long>> callables = new ArrayList<>();
for (Slice shard : shards) {
ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
}
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
long version = future.get();
if (version < checkpoint) {
// we must take the lowest checkpoint from all the shards
checkpoint = version;
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} catch (ExecutionException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} finally {
parallelExecutor.shutdown();
}
rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
use of org.apache.solr.cloud.ZkController in project lucene-solr by apache.
the class DocExpirationUpdateProcessorFactory method iAmInChargeOfPeriodicDeletes.
/**
* <p>
* Helper method that returns true if the Runnable managed by this factory
* should be responsible of doing periodical deletes.
* </p>
* <p>
* In simple standalone installations this method always returns true,
* but in cloud mode it will be true if and only if we are currently the leader
* of the (active) slice with the first name (lexicographically).
* </p>
* <p>
* If this method returns false, it may have also logged a message letting the user
* know why we aren't attempting period deletion (but it will attempt to not log
* this excessively)
* </p>
*/
private boolean iAmInChargeOfPeriodicDeletes() {
ZkController zk = core.getCoreContainer().getZkController();
if (null == zk)
return true;
// This is a lot simpler then doing our own "leader" election across all replicas
// of all shards since:
// a) we already have a per shard leader
// b) shard names must be unique
// c) ClusterState is already being "watched" by ZkController, no additional zk hits
// d) there might be multiple instances of this factory (in multiple chains) per
// collection, so picking an ephemeral node name for our election would be tricky
CloudDescriptor desc = core.getCoreDescriptor().getCloudDescriptor();
String col = desc.getCollectionName();
List<Slice> slices = new ArrayList<Slice>(zk.getClusterState().getActiveSlices(col));
Collections.sort(slices, COMPARE_SLICES_BY_NAME);
if (slices.isEmpty()) {
log.error("Collection {} has no active Slices?", col);
return false;
}
Replica firstSliceLeader = slices.get(0).getLeader();
if (null == firstSliceLeader) {
log.warn("Slice in charge of periodic deletes for {} does not currently have a leader", col);
return false;
}
String leaderInCharge = firstSliceLeader.getName();
String myCoreNodeName = desc.getCoreNodeName();
boolean inChargeOfDeletesRightNow = leaderInCharge.equals(myCoreNodeName);
if (previouslyInChargeOfDeletes && !inChargeOfDeletesRightNow) {
// don't spam the logs constantly, just log when we know that we're not the guy
// (the first time -- or anytime we were, but no longer are)
log.info("Not currently in charge of periodic deletes for this collection, " + "will not trigger delete or log again until this changes");
}
previouslyInChargeOfDeletes = inChargeOfDeletesRightNow;
return inChargeOfDeletesRightNow;
}
Aggregations