use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.
the class DeleteShardCmd method call.
@Override
public void call(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
String collectionName = message.getStr(ZkStateReader.COLLECTION_PROP);
String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
log.info("Delete shard invoked");
Slice slice = clusterState.getSlice(collectionName, sliceId);
if (slice == null) {
if (clusterState.hasCollection(collectionName)) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shard with name " + sliceId + " exists for collection " + collectionName);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
}
}
// For now, only allow for deletions of Inactive slices or custom hashes (range==null).
// TODO: Add check for range gaps on Slice deletion
final Slice.State state = slice.getState();
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY || state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state + ". Only non-active (or custom-hashed) slices can be deleted.");
}
if (state == Slice.State.RECOVERY) {
// mark the slice as 'construction' and only then try to delete the cores
// see SOLR-9455
DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
}
String asyncId = message.getStr(ASYNC);
try {
List<ZkNodeProps> replicas = getReplicasForSlice(collectionName, slice);
CountDownLatch cleanupLatch = new CountDownLatch(replicas.size());
for (ZkNodeProps r : replicas) {
final ZkNodeProps replica = r.plus(message.getProperties()).plus("parallel", "true").plus(ASYNC, asyncId);
log.info("Deleting replica for collection={} shard={} on node={}", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(CoreAdminParams.NODE));
NamedList deleteResult = new NamedList();
try {
((DeleteReplicaCmd) ocmh.commandMap.get(DELETEREPLICA)).deleteReplica(clusterState, replica, deleteResult, () -> {
cleanupLatch.countDown();
if (deleteResult.get("failure") != null) {
synchronized (results) {
results.add("failure", String.format(Locale.ROOT, "Failed to delete replica for collection=%s shard=%s" + " on node=%s", replica.getStr(COLLECTION_PROP), replica.getStr(SHARD_ID_PROP), replica.getStr(NODE_NAME_PROP)));
}
}
SimpleOrderedMap success = (SimpleOrderedMap) deleteResult.get("success");
if (success != null) {
synchronized (results) {
results.add("success", success);
}
}
});
} catch (KeeperException e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
} catch (Exception e) {
log.warn("Error deleting replica: " + r, e);
cleanupLatch.countDown();
throw e;
}
}
log.debug("Waiting for delete shard action to complete");
cleanupLatch.await(5, TimeUnit.MINUTES);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
// wait for a while until we don't see the shard
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS);
boolean removed = false;
while (!timeout.hasTimedOut()) {
Thread.sleep(100);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
removed = collection.getSlice(sliceId) == null;
if (removed) {
// just a bit of time so it's more likely other readers see on return
Thread.sleep(100);
break;
}
}
if (!removed) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Could not fully remove collection: " + collectionName + " shard: " + sliceId);
}
log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
} catch (SolrException e) {
throw e;
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collectionName + " shard: " + sliceId, e);
}
}
use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.
the class LeaderElector method joinElection.
/**
* Begin participating in the election process. Gets a new sequential number
* and begins watching the node with the sequence number before it, unless it
* is the lowest number, in which case, initiates the leader process. If the
* node that is watched goes down, check if we are the new lowest node, else
* watch the next lowest numbered node.
*
* @return sequential node number
*/
public int joinElection(ElectionContext context, boolean replacement, boolean joinAtHead) throws KeeperException, InterruptedException, IOException {
context.joinedElectionFired();
final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
long sessionId = zkClient.getSolrZooKeeper().getSessionId();
String id = sessionId + "-" + context.id;
String leaderSeqPath = null;
boolean cont = true;
int tries = 0;
while (cont) {
try {
if (joinAtHead) {
log.debug("Node {} trying to join election at the head", id);
List<String> nodes = OverseerTaskProcessor.getSortedElectionNodes(zkClient, shardsElectZkPath);
if (nodes.size() < 2) {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
} else {
String firstInLine = nodes.get(1);
log.debug("The current head: {}", firstInLine);
Matcher m = LEADER_SEQ.matcher(firstInLine);
if (!m.matches()) {
throw new IllegalStateException("Could not find regex match in:" + firstInLine);
}
leaderSeqPath = shardsElectZkPath + "/" + id + "-n_" + m.group(1);
zkClient.create(leaderSeqPath, null, CreateMode.EPHEMERAL, false);
}
} else {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null, CreateMode.EPHEMERAL_SEQUENTIAL, false);
}
log.debug("Joined leadership election with path: {}", leaderSeqPath);
context.leaderSeqPath = leaderSeqPath;
cont = false;
} catch (ConnectionLossException e) {
// we don't know if we made our node or not...
List<String> entries = zkClient.getChildren(shardsElectZkPath, null, true);
boolean foundId = false;
for (String entry : entries) {
String nodeId = getNodeId(entry);
if (id.equals(nodeId)) {
// we did create our node...
foundId = true;
break;
}
}
if (!foundId) {
cont = true;
if (tries++ > 20) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
try {
Thread.sleep(50);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
}
}
} catch (KeeperException.NoNodeException e) {
// be working on it, lets try again
if (tries++ > 20) {
context = null;
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
cont = true;
try {
Thread.sleep(50);
} catch (InterruptedException e2) {
Thread.currentThread().interrupt();
}
}
}
checkIfIamLeader(context, replacement);
return getSeq(context.leaderSeqPath);
}
use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.
the class CreateCollectionCmd method createCollectionZkNode.
public static void createCollectionZkNode(SolrZkClient zkClient, String collection, Map<String, String> params) {
log.debug("Check for collection zkNode:" + collection);
String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
try {
if (!zkClient.exists(collectionPath, true)) {
log.debug("Creating collection in ZooKeeper:" + collection);
try {
Map<String, Object> collectionProps = new HashMap<>();
// TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
String defaultConfigName = System.getProperty(ZkController.COLLECTION_PARAM_PREFIX + ZkController.CONFIGNAME_PROP, collection);
if (params.size() > 0) {
collectionProps.putAll(params);
// if the config name wasn't passed in, use the default
if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP)) {
// users can create the collection node and conf link ahead of time, or this may return another option
getConfName(zkClient, collection, collectionPath, collectionProps);
}
} else if (System.getProperty("bootstrap_confdir") != null) {
// if we are bootstrapping a collection, default the config for
// a new collection to the collection we are bootstrapping
log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
Properties sysProps = System.getProperties();
for (String sprop : System.getProperties().stringPropertyNames()) {
if (sprop.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
collectionProps.put(sprop.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
}
}
// if the config name wasn't passed in, use the default
if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP))
collectionProps.put(ZkController.CONFIGNAME_PROP, defaultConfigName);
} else if (Boolean.getBoolean("bootstrap_conf")) {
// the conf name should should be the collection name of this core
collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
} else {
getConfName(zkClient, collection, collectionPath, collectionProps);
}
// we don't put numShards in the collections properties
collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);
ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
} catch (KeeperException e) {
// it's okay if the node already exists
if (e.code() != KeeperException.Code.NODEEXISTS) {
throw e;
}
}
} else {
log.debug("Collection zkNode exists");
}
} catch (KeeperException e) {
// it's okay if another beats us creating the node
if (e.code() == KeeperException.Code.NODEEXISTS) {
return;
}
throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
} catch (InterruptedException e) {
Thread.interrupted();
throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
}
}
use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.
the class CdcrBufferStateManager method createStateNode.
private void createStateNode() {
SolrZkClient zkClient = core.getCoreContainer().getZkController().getZkClient();
try {
if (!zkClient.exists(this.getZnodePath(), true)) {
if (!zkClient.exists(this.getZnodeBase(), true)) {
// Should be a no-op if node exists
zkClient.makePath(this.getZnodeBase(), null, CreateMode.PERSISTENT, null, false, true);
}
zkClient.create(this.getZnodePath(), DEFAULT_STATE.getBytes(), CreateMode.PERSISTENT, true);
log.info("Created znode {}", this.getZnodePath());
}
} catch (KeeperException.NodeExistsException ne) {
// Someone got in first and created the node.
} catch (KeeperException | InterruptedException e) {
log.warn("Failed to create CDCR buffer state node", e);
}
}
use of org.apache.zookeeper.KeeperException in project lucene-solr by apache.
the class CdcrProcessStateManager method synchronize.
/**
* Synchronise the state to Zookeeper. This method must be called only by the handler receiving the
* action.
*/
void synchronize() {
SolrZkClient zkClient = core.getCoreContainer().getZkController().getZkClient();
try {
zkClient.setData(this.getZnodePath(), this.getState().getBytes(), true);
// check if nobody changed it in the meantime, and set a new watcher
this.setState(CdcrParams.ProcessState.get(zkClient.getData(this.getZnodePath(), watcher, null, true)));
} catch (KeeperException | InterruptedException e) {
log.warn("Failed synchronising new state", e);
}
}
Aggregations