use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.
the class CollectionsHandler method handleResponse.
private SolrResponse handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException {
long time = System.nanoTime();
if (m.containsKey(ASYNC) && m.get(ASYNC) != null) {
String asyncId = m.getStr(ASYNC);
if (asyncId.equals("-1")) {
throw new SolrException(ErrorCode.BAD_REQUEST, "requestid can not be -1. It is reserved for cleanup purposes.");
}
NamedList<String> r = new NamedList<>();
if (coreContainer.getZkController().getOverseerCompletedMap().contains(asyncId) || coreContainer.getZkController().getOverseerFailureMap().contains(asyncId) || coreContainer.getZkController().getOverseerRunningMap().contains(asyncId) || overseerCollectionQueueContains(asyncId)) {
r.add("error", "Task with the same requestid already exists.");
} else {
coreContainer.getZkController().getOverseerCollectionQueue().offer(Utils.toJSON(m));
}
r.add(CoreAdminParams.REQUESTID, (String) m.get(ASYNC));
SolrResponse response = new OverseerSolrResponse(r);
rsp.getValues().addAll(response.getResponse());
return response;
}
QueueEvent event = coreContainer.getZkController().getOverseerCollectionQueue().offer(Utils.toJSON(m), timeout);
if (event.getBytes() != null) {
SolrResponse response = SolrResponse.deserialize(event.getBytes());
rsp.getValues().addAll(response.getResponse());
SimpleOrderedMap exp = (SimpleOrderedMap) response.getResponse().get("exception");
if (exp != null) {
Integer code = (Integer) exp.get("rspCode");
rsp.setException(new SolrException(code != null && code != -1 ? ErrorCode.getErrorCode(code) : ErrorCode.SERVER_ERROR, (String) exp.get("msg")));
}
return response;
} else {
if (System.nanoTime() - time >= TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection time out:" + timeout / 1000 + "s");
} else if (event.getWatchedEvent() != null) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection error [Watcher fired on path: " + event.getWatchedEvent().getPath() + " state: " + event.getWatchedEvent().getState() + " type " + event.getWatchedEvent().getType() + "]");
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the collection unknown case");
}
}
}
use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.
the class ConfigSetsHandler method handleResponse.
private void handleResponse(String operation, ZkNodeProps m, SolrQueryResponse rsp, long timeout) throws KeeperException, InterruptedException {
long time = System.nanoTime();
QueueEvent event = coreContainer.getZkController().getOverseerConfigSetQueue().offer(Utils.toJSON(m), timeout);
if (event.getBytes() != null) {
SolrResponse response = SolrResponse.deserialize(event.getBytes());
rsp.getValues().addAll(response.getResponse());
SimpleOrderedMap exp = (SimpleOrderedMap) response.getResponse().get("exception");
if (exp != null) {
Integer code = (Integer) exp.get("rspCode");
rsp.setException(new SolrException(code != null && code != -1 ? ErrorCode.getErrorCode(code) : ErrorCode.SERVER_ERROR, (String) exp.get("msg")));
}
} else {
if (System.nanoTime() - time >= TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset time out:" + timeout / 1000 + "s");
} else if (event.getWatchedEvent() != null) {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset error [Watcher fired on path: " + event.getWatchedEvent().getPath() + " state: " + event.getWatchedEvent().getState() + " type " + event.getWatchedEvent().getType() + "]");
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, operation + " the configset unknown case");
}
}
}
use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.
the class OverseerCollectionConfigSetProcessorTest method issueCreateJob.
protected void issueCreateJob(Integer numberOfSlices, Integer replicationFactor, Integer maxShardsPerNode, List<String> createNodeList, boolean sendCreateNodeList, boolean createNodeSetShuffle) {
Map<String, Object> propMap = Utils.makeMap(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.CREATE.toLower(), ZkStateReader.REPLICATION_FACTOR, replicationFactor.toString(), "name", COLLECTION_NAME, "collection.configName", CONFIG_NAME, OverseerCollectionMessageHandler.NUM_SLICES, numberOfSlices.toString(), ZkStateReader.MAX_SHARDS_PER_NODE, maxShardsPerNode.toString());
if (sendCreateNodeList) {
propMap.put(OverseerCollectionMessageHandler.CREATE_NODE_SET, (createNodeList != null) ? StrUtils.join(createNodeList, ',') : null);
if (OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE_DEFAULT != createNodeSetShuffle || random().nextBoolean()) {
propMap.put(OverseerCollectionMessageHandler.CREATE_NODE_SET_SHUFFLE, createNodeSetShuffle);
}
}
ZkNodeProps props = new ZkNodeProps(propMap);
QueueEvent qe = new QueueEvent("id", Utils.toJSON(props), null) {
@Override
public void setBytes(byte[] bytes) {
lastProcessMessageResult = SolrResponse.deserialize(bytes);
}
};
queue.add(qe);
}
use of org.apache.solr.cloud.OverseerTaskQueue.QueueEvent in project lucene-solr by apache.
the class OverseerTaskProcessor method run.
@Override
public void run() {
log.debug("Process current queue of overseer operations");
LeaderStatus isLeader = amILeader();
while (isLeader == LeaderStatus.DONT_KNOW) {
log.debug("am_i_leader unclear {}", isLeader);
// not a no, not a yes, try ask again
isLeader = amILeader();
}
String oldestItemInWorkQueue = null;
// hasLeftOverItems - used for avoiding re-execution of async tasks that were processed by a previous Overseer.
// This variable is set in case there's any task found on the workQueue when the OCP starts up and
// the id for the queue tail is used as a marker to check for the task in completed/failed map in zk.
// Beyond the marker, all tasks can safely be assumed to have never been executed.
boolean hasLeftOverItems = true;
try {
oldestItemInWorkQueue = workQueue.getTailId();
} catch (KeeperException e) {
// We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
// async calls.
SolrException.log(log, "", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if (oldestItemInWorkQueue == null)
hasLeftOverItems = false;
else
log.debug("Found already existing elements in the work-queue. Last element: {}", oldestItemInWorkQueue);
try {
prioritizer.prioritizeOverseerNodes(myId);
} catch (Exception e) {
if (!zkStateReader.getZkClient().isClosed()) {
log.error("Unable to prioritize overseer ", e);
}
}
// TODO: Make maxThreads configurable.
this.tpe = new ExecutorUtil.MDCAwareThreadPoolExecutor(5, MAX_PARALLEL_TASKS, 0L, TimeUnit.MILLISECONDS, new SynchronousQueue<Runnable>(), new DefaultSolrThreadFactory("OverseerThreadFactory"));
try {
while (!this.isClosed) {
try {
isLeader = amILeader();
if (LeaderStatus.NO == isLeader) {
break;
} else if (LeaderStatus.YES != isLeader) {
log.debug("am_i_leader unclear {}", isLeader);
// not a no, not a yes, try asking again
continue;
}
log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
cleanUpWorkQueue();
printTrackingMaps();
boolean waited = false;
while (runningTasks.size() > MAX_PARALLEL_TASKS) {
synchronized (waitLock) {
//wait for 100 ms or till a task is complete
waitLock.wait(100);
}
waited = true;
}
if (waited)
cleanUpWorkQueue();
ArrayList<QueueEvent> heads = new ArrayList<>(blockedTasks.size() + MAX_PARALLEL_TASKS);
heads.addAll(blockedTasks.values());
// to clear out at least a few items in the queue before we read more items
if (heads.size() < MAX_BLOCKED_TASKS) {
//instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
heads.addAll(newTasks);
} else {
// Prevent free-spinning this loop.
Thread.sleep(1000);
}
if (isClosed)
break;
if (heads.isEmpty()) {
continue;
}
// clear it now; may get refilled below.
blockedTasks.clear();
taskBatch.batchId++;
boolean tooManyTasks = false;
for (QueueEvent head : heads) {
if (!tooManyTasks) {
synchronized (runningTasks) {
tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
}
}
if (tooManyTasks) {
// Too many tasks are running, just shove the rest into the "blocked" queue.
if (blockedTasks.size() < MAX_BLOCKED_TASKS)
blockedTasks.put(head.getId(), head);
continue;
}
if (runningZKTasks.contains(head.getId()))
continue;
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
OverseerMessageHandler messageHandler = selector.selectOverseerMessageHandler(message);
final String asyncId = message.getStr(ASYNC);
if (hasLeftOverItems) {
if (head.getId().equals(oldestItemInWorkQueue))
hasLeftOverItems = false;
if (asyncId != null && (completedMap.contains(asyncId) || failureMap.contains(asyncId))) {
log.debug("Found already processed task in workQueue, cleaning up. AsyncId [{}]", asyncId);
workQueue.remove(head);
continue;
}
}
String operation = message.getStr(Overseer.QUEUE_OPERATION);
OverseerMessageHandler.Lock lock = messageHandler.lockTask(message, taskBatch);
if (lock == null) {
log.debug("Exclusivity check failed for [{}]", message.toString());
//we may end crossing the size of the MAX_BLOCKED_TASKS. They are fine
if (blockedTasks.size() < MAX_BLOCKED_TASKS)
blockedTasks.put(head.getId(), head);
continue;
}
try {
markTaskAsRunning(head, asyncId);
log.debug("Marked task [{}] as running", head.getId());
} catch (KeeperException.NodeExistsException e) {
lock.unlock();
// This should never happen
log.error("Tried to pick up task [{}] when it was already running!", head.getId());
continue;
} catch (InterruptedException e) {
lock.unlock();
log.error("Thread interrupted while trying to pick task for execution.", head.getId());
Thread.currentThread().interrupt();
continue;
}
log.debug(messageHandler.getName() + ": Get the message id:" + head.getId() + " message:" + message.toString());
Runner runner = new Runner(messageHandler, message, operation, head, lock);
tpe.execute(runner);
}
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
log.warn("Overseer cannot talk to ZK");
return;
}
SolrException.log(log, "", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (Exception e) {
SolrException.log(log, "", e);
}
}
} finally {
this.close();
}
}
Aggregations