use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.data.Stat in project lucene-solr by apache.
the class OverseerStatusCmd method call.
@Override
@SuppressWarnings("unchecked")
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
String leaderNode = OverseerTaskProcessor.getLeaderNode(zkStateReader.getZkClient());
results.add("leader", leaderNode);
Stat stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/queue", null, stat, true);
results.add("overseer_queue_size", stat.getNumChildren());
stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/queue-work", null, stat, true);
results.add("overseer_work_queue_size", stat.getNumChildren());
stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/collection-queue-work", null, stat, true);
results.add("overseer_collection_queue_size", stat.getNumChildren());
NamedList overseerStats = new NamedList();
NamedList collectionStats = new NamedList();
NamedList stateUpdateQueueStats = new NamedList();
NamedList workQueueStats = new NamedList();
NamedList collectionQueueStats = new NamedList();
Overseer.Stats stats = ocmh.stats;
for (Map.Entry<String, Overseer.Stat> entry : stats.getStats().entrySet()) {
String key = entry.getKey();
NamedList<Object> lst = new SimpleOrderedMap<>();
if (key.startsWith("collection_")) {
collectionStats.add(key.substring(11), lst);
int successes = stats.getSuccessCount(entry.getKey());
int errors = stats.getErrorCount(entry.getKey());
lst.add("requests", successes);
lst.add("errors", errors);
List<Overseer.FailedOp> failureDetails = stats.getFailureDetails(key);
if (failureDetails != null) {
List<SimpleOrderedMap<Object>> failures = new ArrayList<>();
for (Overseer.FailedOp failedOp : failureDetails) {
SimpleOrderedMap<Object> fail = new SimpleOrderedMap<>();
fail.add("request", failedOp.req.getProperties());
fail.add("response", failedOp.resp.getResponse());
failures.add(fail);
}
lst.add("recent_failures", failures);
}
} else if (key.startsWith("/overseer/queue_")) {
stateUpdateQueueStats.add(key.substring(16), lst);
} else if (key.startsWith("/overseer/queue-work_")) {
workQueueStats.add(key.substring(21), lst);
} else if (key.startsWith("/overseer/collection-queue-work_")) {
collectionQueueStats.add(key.substring(32), lst);
} else {
// overseer stats
overseerStats.add(key, lst);
int successes = stats.getSuccessCount(entry.getKey());
int errors = stats.getErrorCount(entry.getKey());
lst.add("requests", successes);
lst.add("errors", errors);
}
Timer timer = entry.getValue().requestTime;
MetricUtils.addMetrics(lst, timer);
}
results.add("overseer_operations", overseerStats);
results.add("collection_operations", collectionStats);
results.add("overseer_queue", stateUpdateQueueStats);
results.add("overseer_internal_queue", workQueueStats);
results.add("collection_queue", collectionQueueStats);
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.data.Stat in project lucene-solr by apache.
the class OverseerTaskQueue method offer.
/**
* Offer the data and wait for the response
*
*/
public QueueEvent offer(byte[] data, long timeout) throws KeeperException, InterruptedException {
Timer.Context time = stats.time(dir + "_offer");
try {
// Create and watch the response node before creating the request node;
// otherwise we may miss the response.
String watchID = createResponseNode();
Object lock = new Object();
LatchWatcher watcher = new LatchWatcher(lock);
Stat stat = zookeeper.exists(watchID, watcher, true);
// create the request node
createRequestNode(data, watchID);
synchronized (lock) {
if (stat != null && watcher.getWatchedEvent() == null) {
watcher.await(timeout);
}
}
byte[] bytes = zookeeper.getData(watchID, null, null, true);
// create the event before deleting the node, otherwise we can get the deleted
// event from the watcher.
QueueEvent event = new QueueEvent(watchID, bytes, watcher.getWatchedEvent());
zookeeper.delete(watchID, -1, true);
return event;
} finally {
time.stop();
}
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.data.Stat in project lucene-solr by apache.
the class ZkController method init.
private void init(CurrentCoreDescriptorProvider registerOnReconnect) {
try {
createClusterZkNodes(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate();
this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
checkForExistingEphemeralNode();
// start the overseer first as following code may need it's processing
if (!zkRunOnly) {
overseerElector = new LeaderElector(zkClient);
this.overseer = new Overseer(cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(), CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig);
ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
overseerElector.setup(context);
overseerElector.joinElection(context, false);
}
Stat stat = zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, null, true);
if (stat != null && stat.getNumChildren() > 0) {
publishAndWaitForDownStates();
}
// Do this last to signal we're up.
createEphemeralLiveNode();
} catch (IOException e) {
log.error("", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't create ZooKeeperController", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.data.Stat in project lucene-solr by apache.
the class ZkController method checkOverseerDesignate.
public void checkOverseerDesignate() {
try {
byte[] data = zkClient.getData(ZkStateReader.ROLES, null, new Stat(), true);
if (data == null)
return;
Map roles = (Map) Utils.fromJSON(data);
if (roles == null)
return;
List nodeList = (List) roles.get("overseer");
if (nodeList == null)
return;
if (nodeList.contains(getNodeName())) {
ZkNodeProps props = new ZkNodeProps(Overseer.QUEUE_OPERATION, CollectionParams.CollectionAction.ADDROLE.toString().toLowerCase(Locale.ROOT), "node", getNodeName(), "role", "overseer");
log.info("Going to add role {} ", props);
getOverseerCollectionQueue().offer(Utils.toJSON(props));
}
} catch (NoNodeException nne) {
return;
} catch (Exception e) {
log.warn("could not read the overseer designate ", e);
}
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.data.Stat in project lucene-solr by apache.
the class OverseerRoleCmd method call.
@Override
@SuppressWarnings("unchecked")
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
SolrZkClient zkClient = zkStateReader.getZkClient();
Map roles = null;
String node = message.getStr("node");
String roleName = message.getStr("role");
boolean nodeExists = false;
if (nodeExists = zkClient.exists(ZkStateReader.ROLES, true)) {
roles = (Map) Utils.fromJSON(zkClient.getData(ZkStateReader.ROLES, null, new Stat(), true));
} else {
roles = new LinkedHashMap(1);
}
List nodeList = (List) roles.get(roleName);
if (nodeList == null)
roles.put(roleName, nodeList = new ArrayList());
if (ADDROLE == operation) {
log.info("Overseer role added to {}", node);
if (!nodeList.contains(node))
nodeList.add(node);
} else if (REMOVEROLE == operation) {
log.info("Overseer role removed from {}", node);
nodeList.remove(node);
}
if (nodeExists) {
zkClient.setData(ZkStateReader.ROLES, Utils.toJSON(roles), true);
} else {
zkClient.create(ZkStateReader.ROLES, Utils.toJSON(roles), CreateMode.PERSISTENT, true);
}
//if there are too many nodes this command may time out. And most likely dedicated
// overseers are created when there are too many nodes . So , do this operation in a separate thread
new Thread(() -> {
try {
overseerPrioritizer.prioritizeOverseerNodes(ocmh.myId);
} catch (Exception e) {
log.error("Error in prioritizing Overseer", e);
}
}).start();
}
Aggregations