use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class ZkClientClusterStateProvider method connect.
@Override
public void connect() {
if (zkStateReader == null) {
synchronized (this) {
if (zkStateReader == null) {
ZkStateReader zk = null;
try {
zk = new ZkStateReader(zkHost, zkClientTimeout, zkConnectTimeout);
zk.createClusterStateWatchersAndUpdate();
zkStateReader = zk;
log.info("Cluster at {} ready", zkHost);
} catch (InterruptedException e) {
zk.close();
Thread.currentThread().interrupt();
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException e) {
zk.close();
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (Exception e) {
if (zk != null)
zk.close();
// do not wrap because clients may be relying on the underlying exception being thrown
throw e;
}
}
}
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class LeaderElector method joinElection.
/**
* Begin participating in the election process. Gets a new sequential number
* and begins watching the node with the sequence number before it, unless it
* is the lowest number, in which case, initiates the leader process. If the
* node that is watched goes down, check if we are the new lowest node, else
* watch the next lowest numbered node.
*
* @return sequential node number
*/
public int joinElection(ElectionContext context, boolean replacement, boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
context.joinedElectionFired();
final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
long sessionId = zkClient.getSolrZooKeeper().getSessionId();
String id = sessionId + "-" + context.id;
String leaderSeqPath = null;
boolean cont = true;
int tries = 0;
while (cont) {
try {
if (joinAtHead) {
log.debug("Node {} trying to join election at the head", id);
List<String> nodes = OverseerTaskProcessor.getSortedElectionNodes(zkClient, shardsElectZkPath);
if (nodes.size() < 2) {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
} else {
String firstInLine = nodes.get(1);
log.debug("The current head: {}", firstInLine);
Matcher m = LEADER_SEQ.matcher(firstInLine);
if (!m.matches()) {
throw new IllegalStateException("Could not find regex match in:" + firstInLine);
}
leaderSeqPath = shardsElectZkPath + "/" + id + "-n_" + m.group(1);
zkClient.create(leaderSeqPath, null, CreateMode.EPHEMERAL, false);
}
} else {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
}
log.debug("Joined leadership election with path: {}", leaderSeqPath);
context.leaderSeqPath = leaderSeqPath;
cont = false;
} catch (ConnectionLossException e) {
// we don't know if we made our node or not...
List<String> entries = zkClient.getChildren(shardsElectZkPath, null, true);
boolean foundId = false;
for (String entry : entries) {
String nodeId = getNodeId(entry);
if (id.equals(nodeId)) {
// we did create our node...
foundId = true;
break;
}
}
if (!foundId) {
cont = true;
if (tries++ > 20) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
try {
Thread.sleep(50);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
}
}
} catch (KeeperException.NoNodeException e) {
// be working on it, lets try again
if (tries++ > 20) {
context = null;
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
cont = true;
try {
Thread.sleep(50);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
}
}
}
checkIfIamLeader(context, replacement);
return getSeq(context.leaderSeqPath);
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class CreateCollectionCmd method getConfName.
private static void getConfName(SolrZkClient zkClient, String collection, String collectionPath, Map<String, Object> collectionProps) throws KeeperException, InterruptedException {
// check for configName
log.debug("Looking for collection configName");
if (collectionProps.containsKey("configName")) {
log.info("configName was passed as a param {}", collectionProps.get("configName"));
return;
}
List<String> configNames = null;
int retry = 1;
int retryLimt = 6;
for (; retry < retryLimt; retry++) {
if (zkClient.exists(collectionPath, true)) {
ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
if (cProps.containsKey(ZkController.CONFIGNAME_PROP)) {
break;
}
}
// if there is only one conf, use that
try {
configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null, true);
} catch (NoNodeException e) {
// just keep trying
}
if (configNames != null && configNames.size() == 1) {
// no config set named, but there is only 1 - use it
log.info("Only one config set found in zk - using it:" + configNames.get(0));
collectionProps.put(ZkController.CONFIGNAME_PROP, configNames.get(0));
break;
}
if (configNames != null && configNames.contains(collection)) {
log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
break;
}
log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
Thread.sleep(3000);
}
if (retry == retryLimt) {
log.error("Could not find configName for collection " + collection);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "Could not find configName for collection " + collection + " found:" + configNames);
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class ZkController method preRegister.
public void preRegister(CoreDescriptor cd) {
String coreNodeName = getCoreNodeName(cd);
// this also gets us our assigned shard id if it was not specified
try {
checkStateInZk(cd);
CloudDescriptor cloudDesc = cd.getCloudDescriptor();
// make sure the node name is set on the descriptor
if (cloudDesc.getCoreNodeName() == null) {
cloudDesc.setCoreNodeName(coreNodeName);
}
publish(cd, Replica.State.DOWN, false, true);
String collectionName = cd.getCloudDescriptor().getCollectionName();
DocCollection collection = zkStateReader.getClusterState().getCollectionOrNull(collectionName);
log.debug(collection == null ? "Collection {} not visible yet, but flagging it so a watch is registered when it becomes visible" : "Registering watch for collection {}", collectionName);
zkStateReader.registerCore(collectionName);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
if (cd.getCloudDescriptor().getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getClusterState(), coreNodeName)) {
doGetShardIdAndNodeNameProcess(cd);
} else {
// still wait till we see us in local state
doGetShardIdAndNodeNameProcess(cd);
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class ZkController method init.
private void init(CurrentCoreDescriptorProvider registerOnReconnect) {
try {
createClusterZkNodes(zkClient);
zkStateReader.createClusterStateWatchersAndUpdate();
this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName);
checkForExistingEphemeralNode();
// start the overseer first as following code may need it's processing
if (!zkRunOnly) {
overseerElector = new LeaderElector(zkClient);
this.overseer = new Overseer(cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(), CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig);
ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName());
overseerElector.setup(context);
overseerElector.joinElection(context, false);
}
Stat stat = zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, null, true);
if (stat != null && stat.getNumChildren() > 0) {
publishAndWaitForDownStates();
}
// Do this last to signal we're up.
createEphemeralLiveNode();
} catch (IOException e) {
log.error("", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't create ZooKeeperController", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
}
Aggregations