Search in sources :

Example 26 with DefaultSolrThreadFactory

use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.

the class ReplicationHandler method setupPolling.

//  void refreshCommitpoint() {
//    IndexCommit commitPoint = core.getDeletionPolicy().getLatestCommit();
//    if(replicateOnCommit || (replicateOnOptimize && commitPoint.getSegmentCount() == 1)) {
//      indexCommitPoint = commitPoint;
//    }
//  }
private void setupPolling(String intervalStr) {
    pollIntervalStr = intervalStr;
    pollIntervalNs = readIntervalNs(pollIntervalStr);
    if (pollIntervalNs == null || pollIntervalNs <= 0) {
        LOG.info(" No value set for 'pollInterval'. Timer Task not started.");
        return;
    }
    Runnable task = () -> {
        if (pollDisabled.get()) {
            LOG.info("Poll disabled");
            return;
        }
        try {
            LOG.debug("Polling for index modifications");
            markScheduledExecutionStart();
            boolean pollSuccess = doFetch(null, false).getSuccessful();
            if (pollListener != null)
                pollListener.onComplete(core, pollSuccess);
        } catch (Exception e) {
            LOG.error("Exception in fetching index", e);
        }
    };
    executorService = Executors.newSingleThreadScheduledExecutor(new DefaultSolrThreadFactory("indexFetcher"));
    // Randomize initial delay, with a minimum of 1ms
    long initialDelayNs = new Random().nextLong() % pollIntervalNs + TimeUnit.NANOSECONDS.convert(1, TimeUnit.MILLISECONDS);
    executorService.scheduleAtFixedRate(task, initialDelayNs, pollIntervalNs, TimeUnit.NANOSECONDS);
    LOG.info("Poll scheduled at an interval of {}ms", TimeUnit.MILLISECONDS.convert(pollIntervalNs, TimeUnit.NANOSECONDS));
}
Also used : Random(java.util.Random) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) NoSuchFileException(java.nio.file.NoSuchFileException) SolrException(org.apache.solr.common.SolrException) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException)

Example 27 with DefaultSolrThreadFactory

use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.

the class SolrConfigHandler method waitForAllReplicasState.

/**
   * Block up to a specified maximum time until we see agreement on the schema
   * version in ZooKeeper across all replicas for a collection.
   */
private static void waitForAllReplicasState(String collection, ZkController zkController, String prop, int expectedVersion, int maxWaitSecs) {
    final RTimer timer = new RTimer();
    // get a list of active replica cores to query for the schema zk version (skipping this core of course)
    List<PerReplicaCallable> concurrentTasks = new ArrayList<>();
    for (String coreUrl : getActiveReplicaCoreUrls(zkController, collection)) {
        PerReplicaCallable e = new PerReplicaCallable(coreUrl, prop, expectedVersion, maxWaitSecs);
        concurrentTasks.add(e);
    }
    // nothing to wait for ...
    if (concurrentTasks.isEmpty())
        return;
    log.info(formatString("Waiting up to {0} secs for {1} replicas to set the property {2} to be of version {3} for collection {4}", maxWaitSecs, concurrentTasks.size(), prop, expectedVersion, collection));
    // use an executor service to invoke schema zk version requests in parallel with a max wait time
    int poolSize = Math.min(concurrentTasks.size(), 10);
    ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(poolSize, new DefaultSolrThreadFactory("solrHandlerExecutor"));
    try {
        List<Future<Boolean>> results = parallelExecutor.invokeAll(concurrentTasks, maxWaitSecs, TimeUnit.SECONDS);
        // determine whether all replicas have the update
        // lazily init'd
        List<String> failedList = null;
        for (int f = 0; f < results.size(); f++) {
            Boolean success = false;
            Future<Boolean> next = results.get(f);
            if (next.isDone() && !next.isCancelled()) {
                // looks to have finished, but need to check if it succeeded
                try {
                    success = next.get();
                } catch (ExecutionException e) {
                // shouldn't happen since we checked isCancelled
                }
            }
            if (!success) {
                String coreUrl = concurrentTasks.get(f).coreUrl;
                log.warn("Core " + coreUrl + "could not get the expected version " + expectedVersion);
                if (failedList == null)
                    failedList = new ArrayList<>();
                failedList.add(coreUrl);
            }
        }
        // if any tasks haven't completed within the specified timeout, it's an error
        if (failedList != null)
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, formatString("{0} out of {1} the property {2} to be of version {3} within {4} seconds! Failed cores: {5}", failedList.size(), concurrentTasks.size() + 1, prop, expectedVersion, maxWaitSecs, failedList));
    } catch (InterruptedException ie) {
        log.warn(formatString("Core  was interrupted . trying to set the property {1} to version {2} to propagate to {3} replicas for collection {4}", prop, expectedVersion, concurrentTasks.size(), collection));
        Thread.currentThread().interrupt();
    } finally {
        ExecutorUtil.shutdownAndAwaitTermination(parallelExecutor);
    }
    log.info("Took {}ms to set the property {} to be of version {} for collection {}", timer.getTime(), prop, expectedVersion, collection);
}
Also used : ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) StrUtils.formatString(org.apache.solr.common.util.StrUtils.formatString) RTimer(org.apache.solr.util.RTimer) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException)

Example 28 with DefaultSolrThreadFactory

use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.

the class CdcrRequestHandler method handleCollectionCheckpointAction.

/**
   * This action is generally executed on the target cluster in order to retrieve the latest update checkpoint.
   * This checkpoint is used on the source cluster to setup the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader} of a shard leader. <br/>
   * This method will execute in parallel one
   * {@link org.apache.solr.handler.CdcrParams.CdcrAction#SHARDCHECKPOINT} request per shard leader. It will
   * then pick the lowest version number as checkpoint. Picking the lowest amongst all shards will ensure that we do not
   * pick a checkpoint that is ahead of the source cluster. This can occur when other shard leaders are sending new
   * updates to the target cluster while we are currently instantiating the
   * {@link org.apache.solr.update.CdcrUpdateLog.CdcrLogReader}.
   * This solution only works in scenarios where the topology of the source and target clusters are identical.
   */
private void handleCollectionCheckpointAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, SolrServerException {
    ZkController zkController = core.getCoreContainer().getZkController();
    try {
        zkController.getZkStateReader().forceUpdateCollection(collection);
    } catch (Exception e) {
        log.warn("Error when updating cluster state", e);
    }
    ClusterState cstate = zkController.getClusterState();
    Collection<Slice> shards = cstate.getActiveSlices(collection);
    ExecutorService parallelExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(new DefaultSolrThreadFactory("parallelCdcrExecutor"));
    long checkpoint = Long.MAX_VALUE;
    try {
        List<Callable<Long>> callables = new ArrayList<>();
        for (Slice shard : shards) {
            ZkNodeProps leaderProps = zkController.getZkStateReader().getLeaderRetry(collection, shard.getName());
            ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(leaderProps);
            callables.add(new SliceCheckpointCallable(nodeProps.getCoreUrl(), path));
        }
        for (final Future<Long> future : parallelExecutor.invokeAll(callables)) {
            long version = future.get();
            if (version < checkpoint) {
                // we must take the lowest checkpoint from all the shards
                checkpoint = version;
            }
        }
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } catch (ExecutionException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error while requesting shard's checkpoints", e);
    } finally {
        parallelExecutor.shutdown();
    }
    rsp.add(CdcrParams.CHECKPOINT, checkpoint);
}
Also used : ClusterState(org.apache.solr.common.cloud.ClusterState) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) SolrServerException(org.apache.solr.client.solrj.SolrServerException) SolrException(org.apache.solr.common.SolrException) CancellationException(java.util.concurrent.CancellationException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Callable(java.util.concurrent.Callable) ZkController(org.apache.solr.cloud.ZkController) Slice(org.apache.solr.common.cloud.Slice) ExecutorService(java.util.concurrent.ExecutorService) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) ExecutionException(java.util.concurrent.ExecutionException) SolrException(org.apache.solr.common.SolrException)

Example 29 with DefaultSolrThreadFactory

use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.

the class OverseerTaskProcessor method run.

@Override
public void run() {
    log.debug("Process current queue of overseer operations");
    LeaderStatus isLeader = amILeader();
    while (isLeader == LeaderStatus.DONT_KNOW) {
        log.debug("am_i_leader unclear {}", isLeader);
        // not a no, not a yes, try ask again
        isLeader = amILeader();
    }
    String oldestItemInWorkQueue = null;
    // hasLeftOverItems - used for avoiding re-execution of async tasks that were processed by a previous Overseer.
    // This variable is set in case there's any task found on the workQueue when the OCP starts up and
    // the id for the queue tail is used as a marker to check for the task in completed/failed map in zk.
    // Beyond the marker, all tasks can safely be assumed to have never been executed.
    boolean hasLeftOverItems = true;
    try {
        oldestItemInWorkQueue = workQueue.getTailId();
    } catch (KeeperException e) {
        // We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
        // async calls.
        SolrException.log(log, "", e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    if (oldestItemInWorkQueue == null)
        hasLeftOverItems = false;
    else
        log.debug("Found already existing elements in the work-queue. Last element: {}", oldestItemInWorkQueue);
    try {
        prioritizer.prioritizeOverseerNodes(myId);
    } catch (Exception e) {
        if (!zkStateReader.getZkClient().isClosed()) {
            log.error("Unable to prioritize overseer ", e);
        }
    }
    // TODO: Make maxThreads configurable.
    this.tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, MAX_PARALLEL_TASKS, 0L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new DefaultSolrThreadFactory("OverseerThreadFactory"));
    try {
        while (!this.isClosed) {
            try {
                isLeader = amILeader();
                if (LeaderStatus.NO == isLeader) {
                    break;
                } else if (LeaderStatus.YES != isLeader) {
                    log.debug("am_i_leader unclear {}", isLeader);
                    // not a no, not a yes, try asking again
                    continue;
                }
                log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
                cleanUpWorkQueue();
                printTrackingMaps();
                boolean waited = false;
                while (runningTasks.size() > MAX_PARALLEL_TASKS) {
                    synchronized (waitLock) {
                        //wait for 100 ms or till a task is complete
                        waitLock.wait(100);
                    }
                    waited = true;
                }
                if (waited)
                    cleanUpWorkQueue();
                ArrayList<QueueEvent> heads = new ArrayList<>(blockedTasks.size() + MAX_PARALLEL_TASKS);
                heads.addAll(blockedTasks.values());
                // to clear out at least a few items in the queue before we read more items
                if (heads.size() < MAX_BLOCKED_TASKS) {
                    //instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
                    int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
                    List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
                    log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
                    heads.addAll(newTasks);
                } else {
                    // Prevent free-spinning this loop.
                    Thread.sleep(1000);
                }
                if (isClosed)
                    break;
                if (heads.isEmpty()) {
                    continue;
                }
                // clear it now; may get refilled below.
                blockedTasks.clear();
                taskBatch.batchId++;
                boolean tooManyTasks = false;
                for (QueueEvent head : heads) {
                    if (!tooManyTasks) {
                        synchronized (runningTasks) {
                            tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
                        }
                    }
                    if (tooManyTasks) {
                        // Too many tasks are running, just shove the rest into the "blocked" queue.
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    if (runningZKTasks.contains(head.getId()))
                        continue;
                    final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
                    OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message);
                    final String asyncId = message.getStr(ASYNC);
                    if (hasLeftOverItems) {
                        if (head.getId().equals(oldestItemInWorkQueue))
                            hasLeftOverItems = false;
                        if (asyncId != null && (completedMap.contains(asyncId) || failureMap.contains(asyncId))) {
                            log.debug("Found already processed task in workQueue, cleaning up. AsyncId [{}]", asyncId);
                            workQueue.remove(head);
                            continue;
                        }
                    }
                    String operation = message.getStr(Overseer.QUEUE_OPERATION);
                    OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch);
                    if (lock == null) {
                        log.debug("Exclusivity check failed for [{}]", message.toString());
                        //we may end crossing the size of the MAX_BLOCKED_TASKS. They are fine
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    try {
                        markTaskAsRunning(head, asyncId);
                        log.debug("Marked task [{}] as running", head.getId());
                    } catch (KeeperException.NodeExistsException e) {
                        lock.unlock();
                        // This should never happen
                        log.error("Tried to pick up task [{}] when it was already running!", head.getId());
                        continue;
                    } catch (InterruptedException e) {
                        lock.unlock();
                        log.error("Thread interrupted while trying to pick task for execution.", head.getId());
                        Thread.currentThread().interrupt();
                        continue;
                    }
                    log.debug(messageHandler.getName() + ": Get the message id:" + head.getId() + " message:" + message.toString());
                    Runner runner = new Runner(messageHandler, message, operation, head, lock);
                    tpe.execute(runner);
                }
            } catch (KeeperException e) {
                if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
                    log.warn("Overseer cannot talk to ZK");
                    return;
                }
                SolrException.log(log, "", e);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            } catch (Exception e) {
                SolrException.log(log, "", e);
            }
        }
    } finally {
        this.close();
    }
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) LeaderStatus(org.apache.solr.cloud.Overseer.LeaderStatus) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) ExecutorUtil(org.apache.solr.common.util.ExecutorUtil) SynchronousQueue(java.util.concurrent.SynchronousQueue) QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent) KeeperException(org.apache.zookeeper.KeeperException)

Example 30 with DefaultSolrThreadFactory

use of org.apache.solr.util.DefaultSolrThreadFactory in project lucene-solr by apache.

the class SolrCores method close.

// We are shutting down. You can't hold the lock on the various lists of cores while they shut down, so we need to
// make a temporary copy of the names and shut them down outside the lock.
protected void close() {
    waitForLoadingCoresToFinish(30 * 1000);
    Collection<SolrCore> coreList = new ArrayList<>();
    TransientSolrCoreCache transientSolrCoreCache = container.getTransientCacheHandler();
    // Release observer
    if (transientSolrCoreCache != null) {
        transientSolrCoreCache.close();
    }
    // list to the pendingCloses list.
    do {
        coreList.clear();
        synchronized (modifyLock) {
            // make a copy of the cores then clear the map so the core isn't handed out to a request again
            coreList.addAll(cores.values());
            cores.clear();
            if (transientSolrCoreCache != null) {
                coreList.addAll(transientSolrCoreCache.prepareForShutdown());
            }
            coreList.addAll(pendingCloses);
            pendingCloses.clear();
        }
        ExecutorService coreCloseExecutor = ExecutorUtil.newMDCAwareFixedThreadPool(Integer.MAX_VALUE, new DefaultSolrThreadFactory("coreCloseExecutor"));
        try {
            for (SolrCore core : coreList) {
                coreCloseExecutor.submit(() -> {
                    MDCLoggingContext.setCore(core);
                    try {
                        core.close();
                    } catch (Throwable e) {
                        SolrException.log(log, "Error shutting down core", e);
                        if (e instanceof Error) {
                            throw (Error) e;
                        }
                    } finally {
                        MDCLoggingContext.clear();
                    }
                    return core;
                });
            }
        } finally {
            ExecutorUtil.shutdownAndAwaitTermination(coreCloseExecutor);
        }
    } while (coreList.size() > 0);
}
Also used : ArrayList(java.util.ArrayList) ExecutorService(java.util.concurrent.ExecutorService) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory)

Aggregations

DefaultSolrThreadFactory (org.apache.solr.util.DefaultSolrThreadFactory)32 ExecutorService (java.util.concurrent.ExecutorService)19 ArrayList (java.util.ArrayList)18 Future (java.util.concurrent.Future)10 SolrException (org.apache.solr.common.SolrException)9 SolrClient (org.apache.solr.client.solrj.SolrClient)8 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)8 IOException (java.io.IOException)7 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)7 Test (org.junit.Test)7 ExecutionException (java.util.concurrent.ExecutionException)6 SolrDocument (org.apache.solr.common.SolrDocument)6 UpdateResponse (org.apache.solr.client.solrj.response.UpdateResponse)5 ExecutorUtil (org.apache.solr.common.util.ExecutorUtil)5 Callable (java.util.concurrent.Callable)4 SynchronousQueue (java.util.concurrent.SynchronousQueue)4 SolrServerException (org.apache.solr.client.solrj.SolrServerException)4 ClusterState (org.apache.solr.common.cloud.ClusterState)4 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)3 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)3