Search in sources :

Example 1 with LeaderStatus

use of org.apache.solr.cloud.Overseer.LeaderStatus in project lucene-solr by apache.

the class OverseerTaskProcessor method run.

@Override
public void run() {
    log.debug("Process current queue of overseer operations");
    LeaderStatus isLeader = amILeader();
    while (isLeader == LeaderStatus.DONT_KNOW) {
        log.debug("am_i_leader unclear {}", isLeader);
        // not a no, not a yes, try ask again
        isLeader = amILeader();
    }
    String oldestItemInWorkQueue = null;
    // hasLeftOverItems - used for avoiding re-execution of async tasks that were processed by a previous Overseer.
    // This variable is set in case there's any task found on the workQueue when the OCP starts up and
    // the id for the queue tail is used as a marker to check for the task in completed/failed map in zk.
    // Beyond the marker, all tasks can safely be assumed to have never been executed.
    boolean hasLeftOverItems = true;
    try {
        oldestItemInWorkQueue = workQueue.getTailId();
    } catch (KeeperException e) {
        // We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
        // async calls.
        SolrException.log(log, "", e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    if (oldestItemInWorkQueue == null)
        hasLeftOverItems = false;
    else
        log.debug("Found already existing elements in the work-queue. Last element: {}", oldestItemInWorkQueue);
    try {
        prioritizer.prioritizeOverseerNodes(myId);
    } catch (Exception e) {
        if (!zkStateReader.getZkClient().isClosed()) {
            log.error("Unable to prioritize overseer ", e);
        }
    }
    // TODO: Make maxThreads configurable.
    this.tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, MAX_PARALLEL_TASKS, 0L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new DefaultSolrThreadFactory("OverseerThreadFactory"));
    try {
        while (!this.isClosed) {
            try {
                isLeader = amILeader();
                if (LeaderStatus.NO == isLeader) {
                    break;
                } else if (LeaderStatus.YES != isLeader) {
                    log.debug("am_i_leader unclear {}", isLeader);
                    // not a no, not a yes, try asking again
                    continue;
                }
                log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
                cleanUpWorkQueue();
                printTrackingMaps();
                boolean waited = false;
                while (runningTasks.size() > MAX_PARALLEL_TASKS) {
                    synchronized (waitLock) {
                        //wait for 100 ms or till a task is complete
                        waitLock.wait(100);
                    }
                    waited = true;
                }
                if (waited)
                    cleanUpWorkQueue();
                ArrayList<QueueEvent> heads = new ArrayList<>(blockedTasks.size() + MAX_PARALLEL_TASKS);
                heads.addAll(blockedTasks.values());
                // to clear out at least a few items in the queue before we read more items
                if (heads.size() < MAX_BLOCKED_TASKS) {
                    //instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
                    int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
                    List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
                    log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
                    heads.addAll(newTasks);
                } else {
                    // Prevent free-spinning this loop.
                    Thread.sleep(1000);
                }
                if (isClosed)
                    break;
                if (heads.isEmpty()) {
                    continue;
                }
                // clear it now; may get refilled below.
                blockedTasks.clear();
                taskBatch.batchId++;
                boolean tooManyTasks = false;
                for (QueueEvent head : heads) {
                    if (!tooManyTasks) {
                        synchronized (runningTasks) {
                            tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
                        }
                    }
                    if (tooManyTasks) {
                        // Too many tasks are running, just shove the rest into the "blocked" queue.
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    if (runningZKTasks.contains(head.getId()))
                        continue;
                    final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
                    OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message);
                    final String asyncId = message.getStr(ASYNC);
                    if (hasLeftOverItems) {
                        if (head.getId().equals(oldestItemInWorkQueue))
                            hasLeftOverItems = false;
                        if (asyncId != null && (completedMap.contains(asyncId) || failureMap.contains(asyncId))) {
                            log.debug("Found already processed task in workQueue, cleaning up. AsyncId [{}]", asyncId);
                            workQueue.remove(head);
                            continue;
                        }
                    }
                    String operation = message.getStr(Overseer.QUEUE_OPERATION);
                    OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch);
                    if (lock == null) {
                        log.debug("Exclusivity check failed for [{}]", message.toString());
                        //we may end crossing the size of the MAX_BLOCKED_TASKS. They are fine
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    try {
                        markTaskAsRunning(head, asyncId);
                        log.debug("Marked task [{}] as running", head.getId());
                    } catch (KeeperException.NodeExistsException e) {
                        lock.unlock();
                        // This should never happen
                        log.error("Tried to pick up task [{}] when it was already running!", head.getId());
                        continue;
                    } catch (InterruptedException e) {
                        lock.unlock();
                        log.error("Thread interrupted while trying to pick task for execution.", head.getId());
                        Thread.currentThread().interrupt();
                        continue;
                    }
                    log.debug(messageHandler.getName() + ": Get the message id:" + head.getId() + " message:" + message.toString());
                    Runner runner = new Runner(messageHandler, message, operation, head, lock);
                    tpe.execute(runner);
                }
            } catch (KeeperException e) {
                if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
                    log.warn("Overseer cannot talk to ZK");
                    return;
                }
                SolrException.log(log, "", e);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            } catch (Exception e) {
                SolrException.log(log, "", e);
            }
        }
    } finally {
        this.close();
    }
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) LeaderStatus(org.apache.solr.cloud.Overseer.LeaderStatus) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) ExecutorUtil(org.apache.solr.common.util.ExecutorUtil) SynchronousQueue(java.util.concurrent.SynchronousQueue) QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

ArrayList (java.util.ArrayList)1 SynchronousQueue (java.util.concurrent.SynchronousQueue)1 LeaderStatus (org.apache.solr.cloud.Overseer.LeaderStatus)1 QueueEvent (org.apache.solr.cloud.OverseerTaskQueue.QueueEvent)1 SolrException (org.apache.solr.common.SolrException)1 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)1 ExecutorUtil (org.apache.solr.common.util.ExecutorUtil)1 DefaultSolrThreadFactory (org.apache.solr.util.DefaultSolrThreadFactory)1 KeeperException (org.apache.zookeeper.KeeperException)1