Search in sources :

Example 1 with QueueEvent

use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.

the class CollectionsHandler method handleResponse.

private SolrResponse handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException {
    long time = System.nanoTime();
    if (m.containsKey(ASYNC) && m.get(ASYNC) != null) {
        String asyncId = m.getStr(ASYNC);
        if (asyncId.equals("-1")) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "requestid can not be -1. It is reserved for cleanup purposes.");
        }
        NamedList<String> r = new NamedList<>();
        if (coreContainer.getZkController().getOverseerCompletedMap().contains(asyncId) || coreContainer.getZkController().getOverseerFailureMap().contains(asyncId) || coreContainer.getZkController().getOverseerRunningMap().contains(asyncId) || overseerCollectionQueueContains(asyncId)) {
            r.add("error", "Task with the same requestid already exists.");
        } else {
            coreContainer.getZkController().getOverseerCollectionQueue().offer(Utils.toJSON(m));
        }
        r.add(CoreAdminParams.REQUESTID, (String) m.get(ASYNC));
        SolrResponse response = new OverseerSolrResponse(r);
        rsp.getValues().addAll(response.getResponse());
        return response;
    }
    QueueEvent event = coreContainer.getZkController().getOverseerCollectionQueue().offer(Utils.toJSON(m), timeout);
    if (event.getBytes() != null) {
        SolrResponse response = SolrResponse.deserialize(event.getBytes());
        rsp.getValues().addAll(response.getResponse());
        SimpleOrderedMap exp = (SimpleOrderedMap) response.getResponse().get("exception");
        if (exp != null) {
            Integer code = (Integer) exp.get("rspCode");
            rsp.setException(new SolrException(code != null && code != -1 ? ErrorCode.getErrorCode(code) : ErrorCode.SERVER_ERROR, (String) exp.get("msg")));
        }
        return response;
    } else {
        if (System.nanoTime() - time >= TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection time out:" + timeout / 1000 + "s");
        } else if (event.getWatchedEvent() != null) {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection error [Watcher fired on path: " + event.getWatchedEvent().getPath() + " state: " + event.getWatchedEvent().getState() + " type " + event.getWatchedEvent().getType() + "]");
        } else {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection unknown case");
        }
    }
}
Also used : OverseerSolrResponse(org.apache.solr.cloud.OverseerSolrResponse) NamedList(org.apache.solr.common.util.NamedList) QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent) StrUtils.formatString(org.apache.solr.common.util.StrUtils.formatString) SolrResponse(org.apache.solr.client.solrj.SolrResponse) OverseerSolrResponse(org.apache.solr.cloud.OverseerSolrResponse) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException)

Example 2 with QueueEvent

use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.

the class ConfigSetsHandler method handleResponse.

private void handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException {
    long time = System.nanoTime();
    QueueEvent event = coreContainer.getZkController().getOverseerConfigSetQueue().offer(Utils.toJSON(m), timeout);
    if (event.getBytes() != null) {
        SolrResponse response = SolrResponse.deserialize(event.getBytes());
        rsp.getValues().addAll(response.getResponse());
        SimpleOrderedMap exp = (SimpleOrderedMap) response.getResponse().get("exception");
        if (exp != null) {
            Integer code = (Integer) exp.get("rspCode");
            rsp.setException(new SolrException(code != null && code != -1 ? ErrorCode.getErrorCode(code) : ErrorCode.SERVER_ERROR, (String) exp.get("msg")));
        }
    } else {
        if (System.nanoTime() - time >= TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset time out:" + timeout / 1000 + "s");
        } else if (event.getWatchedEvent() != null) {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset error [Watcher fired on path: " + event.getWatchedEvent().getPath() + " state: " + event.getWatchedEvent().getState() + " type " + event.getWatchedEvent().getType() + "]");
        } else {
            throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset unknown case");
        }
    }
}
Also used : QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent) OverseerSolrResponse(org.apache.solr.cloud.OverseerSolrResponse) SolrResponse(org.apache.solr.client.solrj.SolrResponse) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SolrException(org.apache.solr.common.SolrException)

Example 3 with QueueEvent

use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.

the class OverseerCollectionConfigSetProcessorTest method issueCreateJob.

protected void issueCreateJob(Integer numberOfSlices, Integer replicationFactor, Integer maxShardsPerNode, List<String> createNodeList, boolean sendCreateNodeList, boolean createNodeSetShuffle) {
    Map<String, Object> propMap = Utils.makeMap(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(), ZkStateReader.REPLICATION_FACTOR, replicationFactor.toString(), "name", COLLECTION_NAME, "collection.configName", CONFIG_NAME, OverseerCollectionMessageHandler.NUM_SLICES, numberOfSlices.toString(), ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode.toString());
    if (sendCreateNodeList) {
        propMap.put(OverseerCollectionMessageHandler.CREATE_NODE_SET, (createNodeList != null) ? StrUtils.join(createNodeList, ',') : null);
        if (OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE_DEFAULT != createNodeSetShuffle || random().nextBoolean()) {
            propMap.put(OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE, createNodeSetShuffle);
        }
    }
    ZkNodeProps props = new ZkNodeProps(propMap);
    QueueEvent qe = new QueueEvent("id", Utils.toJSON(props), null) {

        @Override
        public void setBytes(byte[] bytes) {
            lastProcessMessageResult = SolrResponse.deserialize(bytes);
        }
    };
    queue.add(qe);
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent)

Example 4 with QueueEvent

use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.

the class OverseerTaskProcessor method run.

@Override
public void run() {
    log.debug("Process current queue of overseer operations");
    LeaderStatus isLeader = amILeader();
    while (isLeader == LeaderStatus.DONT_KNOW) {
        log.debug("am_i_leader unclear {}", isLeader);
        // not a no, not a yes, try ask again
        isLeader = amILeader();
    }
    String oldestItemInWorkQueue = null;
    // hasLeftOverItems - used for avoiding re-execution of async tasks that were processed by a previous Overseer.
    // This variable is set in case there's any task found on the workQueue when the OCP starts up and
    // the id for the queue tail is used as a marker to check for the task in completed/failed map in zk.
    // Beyond the marker, all tasks can safely be assumed to have never been executed.
    boolean hasLeftOverItems = true;
    try {
        oldestItemInWorkQueue = workQueue.getTailId();
    } catch (KeeperException e) {
        // We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
        // async calls.
        SolrException.log(log, "", e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
    }
    if (oldestItemInWorkQueue == null)
        hasLeftOverItems = false;
    else
        log.debug("Found already existing elements in the work-queue. Last element: {}", oldestItemInWorkQueue);
    try {
        prioritizer.prioritizeOverseerNodes(myId);
    } catch (Exception e) {
        if (!zkStateReader.getZkClient().isClosed()) {
            log.error("Unable to prioritize overseer ", e);
        }
    }
    // TODO: Make maxThreads configurable.
    this.tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, MAX_PARALLEL_TASKS, 0L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new DefaultSolrThreadFactory("OverseerThreadFactory"));
    try {
        while (!this.isClosed) {
            try {
                isLeader = amILeader();
                if (LeaderStatus.NO == isLeader) {
                    break;
                } else if (LeaderStatus.YES != isLeader) {
                    log.debug("am_i_leader unclear {}", isLeader);
                    // not a no, not a yes, try asking again
                    continue;
                }
                log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
                cleanUpWorkQueue();
                printTrackingMaps();
                boolean waited = false;
                while (runningTasks.size() > MAX_PARALLEL_TASKS) {
                    synchronized (waitLock) {
                        //wait for 100 ms or till a task is complete
                        waitLock.wait(100);
                    }
                    waited = true;
                }
                if (waited)
                    cleanUpWorkQueue();
                ArrayList<QueueEvent> heads = new ArrayList<>(blockedTasks.size() + MAX_PARALLEL_TASKS);
                heads.addAll(blockedTasks.values());
                // to clear out at least a few items in the queue before we read more items
                if (heads.size() < MAX_BLOCKED_TASKS) {
                    //instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
                    int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
                    List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
                    log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
                    heads.addAll(newTasks);
                } else {
                    // Prevent free-spinning this loop.
                    Thread.sleep(1000);
                }
                if (isClosed)
                    break;
                if (heads.isEmpty()) {
                    continue;
                }
                // clear it now; may get refilled below.
                blockedTasks.clear();
                taskBatch.batchId++;
                boolean tooManyTasks = false;
                for (QueueEvent head : heads) {
                    if (!tooManyTasks) {
                        synchronized (runningTasks) {
                            tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
                        }
                    }
                    if (tooManyTasks) {
                        // Too many tasks are running, just shove the rest into the "blocked" queue.
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    if (runningZKTasks.contains(head.getId()))
                        continue;
                    final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
                    OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message);
                    final String asyncId = message.getStr(ASYNC);
                    if (hasLeftOverItems) {
                        if (head.getId().equals(oldestItemInWorkQueue))
                            hasLeftOverItems = false;
                        if (asyncId != null && (completedMap.contains(asyncId) || failureMap.contains(asyncId))) {
                            log.debug("Found already processed task in workQueue, cleaning up. AsyncId [{}]", asyncId);
                            workQueue.remove(head);
                            continue;
                        }
                    }
                    String operation = message.getStr(Overseer.QUEUE_OPERATION);
                    OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch);
                    if (lock == null) {
                        log.debug("Exclusivity check failed for [{}]", message.toString());
                        //we may end crossing the size of the MAX_BLOCKED_TASKS. They are fine
                        if (blockedTasks.size() < MAX_BLOCKED_TASKS)
                            blockedTasks.put(head.getId(), head);
                        continue;
                    }
                    try {
                        markTaskAsRunning(head, asyncId);
                        log.debug("Marked task [{}] as running", head.getId());
                    } catch (KeeperException.NodeExistsException e) {
                        lock.unlock();
                        // This should never happen
                        log.error("Tried to pick up task [{}] when it was already running!", head.getId());
                        continue;
                    } catch (InterruptedException e) {
                        lock.unlock();
                        log.error("Thread interrupted while trying to pick task for execution.", head.getId());
                        Thread.currentThread().interrupt();
                        continue;
                    }
                    log.debug(messageHandler.getName() + ": Get the message id:" + head.getId() + " message:" + message.toString());
                    Runner runner = new Runner(messageHandler, message, operation, head, lock);
                    tpe.execute(runner);
                }
            } catch (KeeperException e) {
                if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
                    log.warn("Overseer cannot talk to ZK");
                    return;
                }
                SolrException.log(log, "", e);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            } catch (Exception e) {
                SolrException.log(log, "", e);
            }
        }
    } finally {
        this.close();
    }
}
Also used : ZkNodeProps(org.apache.solr.common.cloud.ZkNodeProps) ArrayList(java.util.ArrayList) DefaultSolrThreadFactory(org.apache.solr.util.DefaultSolrThreadFactory) LeaderStatus(org.apache.solr.cloud.Overseer.LeaderStatus) SolrException(org.apache.solr.common.SolrException) KeeperException(org.apache.zookeeper.KeeperException) ExecutorUtil(org.apache.solr.common.util.ExecutorUtil) SynchronousQueue(java.util.concurrent.SynchronousQueue) QueueEvent(org.apache.solr.cloud.OverseerTaskQueue.QueueEvent) KeeperException(org.apache.zookeeper.KeeperException)

Aggregations

QueueEvent (org.apache.solr.cloud.OverseerTaskQueue.QueueEvent)4 SolrException (org.apache.solr.common.SolrException)3 SolrResponse (org.apache.solr.client.solrj.SolrResponse)2 OverseerSolrResponse (org.apache.solr.cloud.OverseerSolrResponse)2 ZkNodeProps (org.apache.solr.common.cloud.ZkNodeProps)2 SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)2 ArrayList (java.util.ArrayList)1 SynchronousQueue (java.util.concurrent.SynchronousQueue)1 LeaderStatus (org.apache.solr.cloud.Overseer.LeaderStatus)1 ExecutorUtil (org.apache.solr.common.util.ExecutorUtil)1 NamedList (org.apache.solr.common.util.NamedList)1 StrUtils.formatString (org.apache.solr.common.util.StrUtils.formatString)1 DefaultSolrThreadFactory (org.apache.solr.util.DefaultSolrThreadFactory)1 KeeperException (org.apache.zookeeper.KeeperException)1