use of org.apache.solr.common.util.NamedList in project lucene-solr by apache.
the class ReplicateFromLeader method startReplication.
/**
* Start a replication handler thread that will periodically pull indices from the shard leader
* @param switchTransactionLog if true, ReplicationHandler will rotate the transaction log once
* the replication is done
*/
public void startReplication(boolean switchTransactionLog) throws InterruptedException {
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
if (cc.isShutDown()) {
return;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
}
}
SolrConfig.UpdateHandlerInfo uinfo = core.getSolrConfig().getUpdateHandlerInfo();
String pollIntervalStr = "00:00:03";
if (uinfo.autoCommmitMaxTime != -1) {
pollIntervalStr = toPollIntervalStr(uinfo.autoCommmitMaxTime / 2);
} else if (uinfo.autoSoftCommmitMaxTime != -1) {
pollIntervalStr = toPollIntervalStr(uinfo.autoSoftCommmitMaxTime / 2);
}
LOG.info("Will start replication from leader with poll interval: {}", pollIntervalStr);
NamedList slaveConfig = new NamedList();
slaveConfig.add("fetchFromLeader", true);
slaveConfig.add("pollInterval", pollIntervalStr);
NamedList replicationConfig = new NamedList();
replicationConfig.add("slave", slaveConfig);
String lastCommitVersion = getCommitVersion(core);
if (lastCommitVersion != null) {
lastVersion = Long.parseLong(lastCommitVersion);
}
replicationProcess = new ReplicationHandler();
if (switchTransactionLog) {
replicationProcess.setPollListener((solrCore, pollSuccess) -> {
if (pollSuccess) {
String commitVersion = getCommitVersion(core);
if (commitVersion == null)
return;
if (Long.parseLong(commitVersion) == lastVersion)
return;
UpdateLog updateLog = solrCore.getUpdateHandler().getUpdateLog();
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
CommitUpdateCommand cuc = new CommitUpdateCommand(req, false);
cuc.setVersion(Long.parseLong(commitVersion));
updateLog.copyOverOldUpdates(cuc);
lastVersion = Long.parseLong(commitVersion);
}
});
}
replicationProcess.init(replicationConfig);
replicationProcess.inform(core);
}
}
use of org.apache.solr.common.util.NamedList in project lucene-solr by apache.
the class OverseerStatusCmd method call.
@Override
@SuppressWarnings("unchecked")
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
String leaderNode = OverseerTaskProcessor.getLeaderNode(zkStateReader.getZkClient());
results.add("leader", leaderNode);
Stat stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/queue", null, stat, true);
results.add("overseer_queue_size", stat.getNumChildren());
stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/queue-work", null, stat, true);
results.add("overseer_work_queue_size", stat.getNumChildren());
stat = new Stat();
zkStateReader.getZkClient().getData("/overseer/collection-queue-work", null, stat, true);
results.add("overseer_collection_queue_size", stat.getNumChildren());
NamedList overseerStats = new NamedList();
NamedList collectionStats = new NamedList();
NamedList stateUpdateQueueStats = new NamedList();
NamedList workQueueStats = new NamedList();
NamedList collectionQueueStats = new NamedList();
Overseer.Stats stats = ocmh.stats;
for (Map.Entry<String, Overseer.Stat> entry : stats.getStats().entrySet()) {
String key = entry.getKey();
NamedList<Object> lst = new SimpleOrderedMap<>();
if (key.startsWith("collection_")) {
collectionStats.add(key.substring(11), lst);
int successes = stats.getSuccessCount(entry.getKey());
int errors = stats.getErrorCount(entry.getKey());
lst.add("requests", successes);
lst.add("errors", errors);
List<Overseer.FailedOp> failureDetails = stats.getFailureDetails(key);
if (failureDetails != null) {
List<SimpleOrderedMap<Object>> failures = new ArrayList<>();
for (Overseer.FailedOp failedOp : failureDetails) {
SimpleOrderedMap<Object> fail = new SimpleOrderedMap<>();
fail.add("request", failedOp.req.getProperties());
fail.add("response", failedOp.resp.getResponse());
failures.add(fail);
}
lst.add("recent_failures", failures);
}
} else if (key.startsWith("/overseer/queue_")) {
stateUpdateQueueStats.add(key.substring(16), lst);
} else if (key.startsWith("/overseer/queue-work_")) {
workQueueStats.add(key.substring(21), lst);
} else if (key.startsWith("/overseer/collection-queue-work_")) {
collectionQueueStats.add(key.substring(32), lst);
} else {
// overseer stats
overseerStats.add(key, lst);
int successes = stats.getSuccessCount(entry.getKey());
int errors = stats.getErrorCount(entry.getKey());
lst.add("requests", successes);
lst.add("errors", errors);
}
Timer timer = entry.getValue().requestTime;
MetricUtils.addMetrics(lst, timer);
}
results.add("overseer_operations", overseerStats);
results.add("collection_operations", collectionStats);
results.add("overseer_queue", stateUpdateQueueStats);
results.add("overseer_internal_queue", workQueueStats);
results.add("collection_queue", collectionQueueStats);
}
use of org.apache.solr.common.util.NamedList in project lucene-solr by apache.
the class ReplaceNodeCmd method call.
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
ocmh.checkRequired(message, "source", "target");
String source = message.getStr("source");
String target = message.getStr("target");
String async = message.getStr("async");
boolean parallel = message.getBool("parallel", false);
ClusterState clusterState = zkStateReader.getClusterState();
if (!clusterState.liveNodesContain(source)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Source Node: " + source + " is not live");
}
if (!clusterState.liveNodesContain(target)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target Node: " + target + " is not live");
}
List<ZkNodeProps> sourceReplicas = getReplicasOfNode(source, clusterState);
List<ZkNodeProps> createdReplicas = new ArrayList<>();
AtomicBoolean anyOneFailed = new AtomicBoolean(false);
CountDownLatch countDownLatch = new CountDownLatch(sourceReplicas.size());
for (ZkNodeProps sourceReplica : sourceReplicas) {
NamedList nl = new NamedList();
log.info("Going to create replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
ZkNodeProps msg = sourceReplica.plus("parallel", String.valueOf(parallel)).plus(CoreAdminParams.NODE, target);
if (async != null)
msg.getProperties().put(ASYNC, async);
final ZkNodeProps addedReplica = ocmh.addReplica(clusterState, msg, nl, () -> {
countDownLatch.countDown();
if (nl.get("failure") != null) {
String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s shard=%s" + " on node=%s", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
log.warn(errorString);
// and exit
synchronized (results) {
results.add("failure", errorString);
anyOneFailed.set(true);
}
} else {
log.debug("Successfully created replica for collection={} shard={} on node={}", sourceReplica.getStr(COLLECTION_PROP), sourceReplica.getStr(SHARD_ID_PROP), target);
}
});
if (addedReplica != null) {
createdReplicas.add(addedReplica);
}
}
log.debug("Waiting for replace node action to complete");
countDownLatch.await(5, TimeUnit.MINUTES);
log.debug("Finished waiting for replace node action to complete");
if (anyOneFailed.get()) {
log.info("Failed to create some replicas. Cleaning up all replicas on target node");
CountDownLatch cleanupLatch = new CountDownLatch(createdReplicas.size());
for (ZkNodeProps createdReplica : createdReplicas) {
NamedList deleteResult = new NamedList();
try {
ocmh.deleteReplica(zkStateReader.getClusterState(), createdReplica.plus("parallel", "true"), deleteResult, () -> {
cleanupLatch.countDown();
if (deleteResult.get("failure") != null) {
synchronized (results) {
results.add("failure", "Could not cleanup, because of : " + deleteResult.get("failure"));
}
}
});
} catch (KeeperException e) {
cleanupLatch.countDown();
log.warn("Error deleting replica ", e);
} catch (Exception e) {
log.warn("Error deleting replica ", e);
cleanupLatch.countDown();
throw e;
}
}
cleanupLatch.await(5, TimeUnit.MINUTES);
}
// we have reached this far means all replicas could be recreated
//now cleanup the replicas in the source node
DeleteNodeCmd.cleanupReplicas(results, state, sourceReplicas, ocmh, source, async);
results.add("success", "REPLACENODE action completed successfully from : " + source + " to : " + target);
}
use of org.apache.solr.common.util.NamedList in project lucene-solr by apache.
the class OverseerConfigSetMessageHandler method deleteConfigSet.
private void deleteConfigSet(String configSetName, boolean force) throws IOException {
ZkConfigManager configManager = new ZkConfigManager(zkStateReader.getZkClient());
if (!configManager.configExists(configSetName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "ConfigSet does not exist to delete: " + configSetName);
}
for (Map.Entry<String, DocCollection> entry : zkStateReader.getClusterState().getCollectionsMap().entrySet()) {
if (configSetName.equals(zkStateReader.readConfigName(entry.getKey())))
throw new SolrException(ErrorCode.BAD_REQUEST, "Can not delete ConfigSet as it is currently being used by collection [" + entry.getKey() + "]");
}
String propertyPath = ConfigSetProperties.DEFAULT_FILENAME;
NamedList properties = getConfigSetProperties(getPropertyPath(configSetName, propertyPath));
if (properties != null) {
Object immutable = properties.get(ConfigSetProperties.IMMUTABLE_CONFIGSET_ARG);
boolean isImmutableConfigSet = immutable != null ? Boolean.parseBoolean(immutable.toString()) : false;
if (!force && isImmutableConfigSet) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Requested delete of immutable ConfigSet: " + configSetName);
}
}
configManager.deleteConfigDir(configSetName);
}
use of org.apache.solr.common.util.NamedList in project lucene-solr by apache.
the class OverseerRoleCmd method call.
@Override
@SuppressWarnings("unchecked")
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
ZkStateReader zkStateReader = ocmh.zkStateReader;
SolrZkClient zkClient = zkStateReader.getZkClient();
Map roles = null;
String node = message.getStr("node");
String roleName = message.getStr("role");
boolean nodeExists = false;
if (nodeExists = zkClient.exists(ZkStateReader.ROLES, true)) {
roles = (Map) Utils.fromJSON(zkClient.getData(ZkStateReader.ROLES, null, new Stat(), true));
} else {
roles = new LinkedHashMap(1);
}
List nodeList = (List) roles.get(roleName);
if (nodeList == null)
roles.put(roleName, nodeList = new ArrayList());
if (ADDROLE == operation) {
log.info("Overseer role added to {}", node);
if (!nodeList.contains(node))
nodeList.add(node);
} else if (REMOVEROLE == operation) {
log.info("Overseer role removed from {}", node);
nodeList.remove(node);
}
if (nodeExists) {
zkClient.setData(ZkStateReader.ROLES, Utils.toJSON(roles), true);
} else {
zkClient.create(ZkStateReader.ROLES, Utils.toJSON(roles), CreateMode.PERSISTENT, true);
}
//if there are too many nodes this command may time out. And most likely dedicated
// overseers are created when there are too many nodes . So , do this operation in a separate thread
new Thread(() -> {
try {
overseerPrioritizer.prioritizeOverseerNodes(ocmh.myId);
} catch (Exception e) {
log.error("Error in prioritizing Overseer", e);
}
}).start();
}
Aggregations