use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.
the class ReplicationHandler method setupPolling.
// void refreshCommitpoint() {
// IndexCommit commitPoint = core.getDeletionPolicy().getLatestCommit();
// if(replicateOnCommit || (replicateOnOptimize && commitPoint.getSegmentCount() == 1)) {
// indexCommitPoint = commitPoint;
// }
// }
private void setupPolling(String intervalStr) {
pollIntervalStr = intervalStr;
pollIntervalNs = readIntervalNs(pollIntervalStr);
if (pollIntervalNs == null || pollIntervalNs <= 0) {
LOG.info(" No value set for 'pollInterval'. Timer Task not started.");
return;
}
Runnable task = () -> {
if (pollDisabled.get()) {
LOG.info("Poll disabled");
return;
}
try {
LOG.debug("Polling for index modifications");
markScheduledExecutionStart();
boolean pollSuccess = doFetch(null, false).getSuccessful();
if (pollListener != null)
pollListener.onComplete(core, pollSuccess);
} catch (Exception e) {
LOG.error("Exception in fetching index", e);
}
};
executorService = Executors.newSingleThreadScheduledExecutor(new DefaultSolrThreadFactory("indexFetcher"));
// Randomize initial delay, with a minimum of 1ms
long initialDelayNs = new Random().nextLong() % pollIntervalNs + TimeUnit.NANOSECONDS.convert(1, TimeUnit.MILLISECONDS);
executorService.scheduleAtFixedRate(task, initialDelayNs, pollIntervalNs, TimeUnit.NANOSECONDS);
LOG.info("Poll scheduled at an interval of {}ms", TimeUnit.MILLISECONDS.convert(pollIntervalNs, TimeUnit.NANOSECONDS));
}
use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.
the class SolrConfigHandler method waitForAllReplicasState.
/**
* Block up to a specified maximum time until we see agreement on the schema
* version in ZooKeeper across all replicas for a collection.
*/
private static void waitForAllReplicasState(String collection, ZkController zkController, String prop, int expectedVersion, int maxWaitSecs) {
final RTimer timer = new RTimer();
// get a list of active replica cores to query for the schema zk version (skipping this core of course)
List<PerReplicaCallable> concurrentTasks = new ArrayList<>();
for (String coreUrl : getActiveReplicaCoreUrls(zkController, collection)) {
PerReplicaCallable e = new PerReplicaCallable(coreUrl, prop, expectedVersion, maxWaitSecs);
concurrentTasks.add(e);
}
// nothing to wait for ...
if (concurrentTasks.isEmpty())
return;
log.info(formatString("Waiting up to {0} secs for {1} replicas to set the property {2} to be of version {3} for collection {4}", maxWaitSecs, concurrentTasks.size(), prop, expectedVersion, collection));
// use an executor service to invoke schema zk version requests in parallel with a max wait time
int poolSize = Math.min(concurrentTasks.size(), 10);
ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(poolSize, new DefaultSolrThreadFactory("solrHandlerExecutor"));
try {
List<Future<Boolean>> results = parallelExecutor.invokeAll(concurrentTasks, maxWaitSecs, TimeUnit.SECONDS);
// determine whether all replicas have the update
// lazily init'd
List<String> failedList = null;
for (int f = 0; f < results.size(); f++) {
Boolean success = false;
Future<Boolean> next = results.get(f);
if (next.isDone() && !next.isCancelled()) {
// looks to have finished, but need to check if it succeeded
try {
success = next.get();
} catch (ExecutionException e) {
// shouldn't happen since we checked isCancelled
}
}
if (!success) {
String coreUrl = concurrentTasks.get(f).coreUrl;
log.warn("Core " + coreUrl + "could not get the expected version " + expectedVersion);
if (failedList == null)
failedList = new ArrayList<>();
failedList.add(coreUrl);
}
}
// if any tasks haven't completed within the specified timeout, it's an error
if (failedList != null)
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, formatString("{0} out of {1} the property {2} to be of version {3} within {4} seconds! Failed cores: {5}", failedList.size(), concurrentTasks.size() + 1, prop, expectedVersion, maxWaitSecs, failedList));
} catch (InterruptedException ie) {
log.warn(formatString("Core was interrupted . trying to set the property {1} to version {2} to propagate to {3} replicas for collection {4}", prop, expectedVersion, concurrentTasks.size(), collection));
Thread.currentThread().interrupt();
} finally {
ExecutorUtil.shutdownAndAwaitTermination(parallelExecutor);
}
log.info("Took {}ms to set the property {} to be of version {} for collection {}", timer.getTime(), prop, expectedVersion, collection);
}
use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.
the class CdcrRequestHandler method handleCollectionCheckpointAction.
/**
* This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
* This checkpoint is used on the source cluster to setup the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
* This method will execute in parallel one
* {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
* then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
* pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
* updates to the target cluster while we are currently instantiating the
* {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
* This solution only works in scenarios where the topology of the source and target clusters are identical.
*/
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
ZkController zkController = core.getCoreContainer().getZkController();
try {
zkController.getZkStateReader().forceUpdateCollection(collection);
} catch (Exception e) {
log.warn("Error when updating cluster state", e);
}
ClusterState cstate = zkController.getClusterState();
Collection<Slice> shards = cstate.getActiveSlices(collection);
ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
long checkpoint = Long.MAX_VALUE;
try {
List<Callable<Long>> callables = new ArrayList<>();
for (Slice shard : shards) {
ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
}
for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
long version = future.get();
if (version < checkpoint) {
// we must take the lowest checkpoint from all the shards
checkpoint = version;
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} catch (ExecutionException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
} finally {
parallelExecutor.shutdown();
}
rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.
the class OverseerTaskProcessor method run.
@Override
public void run() {
log.debug("Process current queue of overseer operations");
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
log.debug("am_i_leader unclear {}", isLeader);
// not a no, not a yes, try ask again
isLeader = amILeader();
}
String oldestItemInWorkQueue = null;
// hasLeftOverItems - used for avoiding re-execution of async tasks that were processed by a previous Overseer.
// This variable is set in case there's any task found on the workQueue when the OCP starts up and
// the id for the queue tail is used as a marker to check for the task in completed/failed map in zk.
// Beyond the marker, all tasks can safely be assumed to have never been executed.
boolean hasLeftOverItems = true;
try {
oldestItemInWorkQueue = workQueue.getTailId();
} catch (KeeperException e) {
// We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
// async calls.
SolrException.log(log, "", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if (oldestItemInWorkQueue == null)
hasLeftOverItems = false;
else
log.debug("Found already existing elements in the work-queue. Last element: {}", oldestItemInWorkQueue);
try {
prioritizer.prioritizeOverseerNodes(myId);
} catch (Exception e) {
if (!zkStateReader.getZkClient().isClosed()) {
log.error("Unable to prioritize overseer ", e);
}
}
// TODO: Make maxThreads configurable.
this.tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, MAX_PARALLEL_TASKS, 0L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new DefaultSolrThreadFactory("OverseerThreadFactory"));
try {
while (!this.isClosed) {
try {
isLeader = amILeader();
if (LeaderStatus.NO == isLeader) {
break;
} else if (LeaderStatus.YES != isLeader) {
log.debug("am_i_leader unclear {}", isLeader);
// not a no, not a yes, try asking again
continue;
}
log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
cleanUpWorkQueue();
printTrackingMaps();
boolean waited = false;
while (runningTasks.size() > MAX_PARALLEL_TASKS) {
synchronized (waitLock) {
//wait for 100 ms or till a task is complete
waitLock.wait(100);
}
waited = true;
}
if (waited)
cleanUpWorkQueue();
ArrayList<QueueEvent> heads = new ArrayList<>(blockedTasks.size() + MAX_PARALLEL_TASKS);
heads.addAll(blockedTasks.values());
// to clear out at least a few items in the queue before we read more items
if (heads.size() < MAX_BLOCKED_TASKS) {
//instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
heads.addAll(newTasks);
} else {
// Prevent free-spinning this loop.
Thread.sleep(1000);
}
if (isClosed)
break;
if (heads.isEmpty()) {
continue;
}
// clear it now; may get refilled below.
blockedTasks.clear();
taskBatch.batchId++;
boolean tooManyTasks = false;
for (QueueEvent head : heads) {
if (!tooManyTasks) {
synchronized (runningTasks) {
tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
}
}
if (tooManyTasks) {
// Too many tasks are running, just shove the rest into the "blocked" queue.
if (blockedTasks.size() < MAX_BLOCKED_TASKS)
blockedTasks.put(head.getId(), head);
continue;
}
if (runningZKTasks.contains(head.getId()))
continue;
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message);
final String asyncId = message.getStr(ASYNC);
if (hasLeftOverItems) {
if (head.getId().equals(oldestItemInWorkQueue))
hasLeftOverItems = false;
if (asyncId != null && (completedMap.contains(asyncId) || failureMap.contains(asyncId))) {
log.debug("Found already processed task in workQueue, cleaning up. AsyncId [{}]", asyncId);
workQueue.remove(head);
continue;
}
}
String operation = message.getStr(Overseer.QUEUE_OPERATION);
OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch);
if (lock == null) {
log.debug("Exclusivity check failed for [{}]", message.toString());
//we may end crossing the size of the MAX_BLOCKED_TASKS. They are fine
if (blockedTasks.size() < MAX_BLOCKED_TASKS)
blockedTasks.put(head.getId(), head);
continue;
}
try {
markTaskAsRunning(head, asyncId);
log.debug("Marked task [{}] as running", head.getId());
} catch (KeeperException.NodeExistsException e) {
lock.unlock();
// This should never happen
log.error("Tried to pick up task [{}] when it was already running!", head.getId());
continue;
} catch (InterruptedException e) {
lock.unlock();
log.error("Thread interrupted while trying to pick task for execution.", head.getId());
Thread.currentThread().interrupt();
continue;
}
log.debug(messageHandler.getName() + ": Get the message id:" + head.getId() + " message:" + message.toString());
Runner runner = new Runner(messageHandler, message, operation, head, lock);
tpe.execute(runner);
}
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
log.warn("Overseer cannot talk to ZK");
return;
}
SolrException.log(log, "", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (Exception e) {
SolrException.log(log, "", e);
}
}
} finally {
this.close();
}
}
use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.
the class SolrCores method close.
// We are shutting down. You can't hold the lock on the various lists of cores while they shut down, so we need to
// make a temporary copy of the names and shut them down outside the lock.
protected void close() {
waitForLoadingCoresToFinish(30 * 1000);
Collection<SolrCore> coreList = new ArrayList<>();
TransientSolrCoreCache transientSolrCoreCache = container.getTransientCacheHandler();
// Release observer
if (transientSolrCoreCache != null) {
transientSolrCoreCache.close();
}
// list to the pendingCloses list.
do {
coreList.clear();
synchronized (modifyLock) {
// make a copy of the cores then clear the map so the core isn't handed out to a request again
coreList.addAll(cores.values());
cores.clear();
if (transientSolrCoreCache != null) {
coreList.addAll(transientSolrCoreCache.prepareForShutdown());
}
coreList.addAll(pendingCloses);
pendingCloses.clear();
}
ExecutorService coreCloseExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(Integer.MAX_VALUE, new DefaultSolrThreadFactory("coreCloseExecutor"));
try {
for (SolrCore core : coreList) {
coreCloseExecutor.submit(() -> {
MDCLoggingContext.setCore(core);
try {
core.close();
} catch (Throwable e) {
SolrException.log(log, "Error shutting down core", e);
if (e instanceof Error) {
throw (Error) e;
}
} finally {
MDCLoggingContext.clear();
}
return core;
});
}
} finally {
ExecutorUtil.shutdownAndAwaitTermination(coreCloseExecutor);
}
} while (coreList.size() > 0);
}
Aggregations