use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class RecoveryStrategy method run.
@Override
public final void run() {
// set request info for logging
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
SolrException.log(LOG, "SolrCore not found - cannot recover:" + coreName);
return;
}
MDCLoggingContext.setCore(core);
LOG.info("Starting recovery process. recoveringAfterStartup=" + recoveringAfterStartup);
try {
doRecovery(core);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
SolrException.log(LOG, "", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (Exception e) {
LOG.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
} finally {
MDCLoggingContext.clear();
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class ZkIndexSchemaReader method createSchemaWatcher.
public void createSchemaWatcher() {
log.info("Creating ZooKeeper watch for the managed schema at " + managedSchemaPath);
try {
zkClient.exists(managedSchemaPath, new Watcher() {
@Override
public void process(WatchedEvent event) {
if (ZkIndexSchemaReader.this.isRemoved) {
// the core for this reader has already been removed, don't process this event
return;
}
// session events are not change events, and do not remove the watcher
if (Event.EventType.None.equals(event.getType())) {
return;
}
log.info("A schema change: {}, has occurred - updating schema from ZooKeeper ...", event);
try {
updateSchema(this, -1);
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED || e.code() == KeeperException.Code.CONNECTIONLOSS) {
log.warn("ZooKeeper watch triggered, but Solr cannot talk to ZK");
return;
}
log.error("", e);
throw new ZooKeeperException(ErrorCode.SERVER_ERROR, "", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.warn("", e);
}
}
}, true);
} catch (KeeperException e) {
final String msg = "Error creating ZooKeeper watch for the managed schema";
log.error(msg, e);
throw new ZooKeeperException(ErrorCode.SERVER_ERROR, msg, e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.warn("", e);
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class ZkController method register.
/**
* Register shard with ZooKeeper.
*
* @return the shardId for the SolrCore
*/
public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration, boolean skipRecovery) throws Exception {
try (SolrCore core = cc.getCore(desc.getName())) {
MDCLoggingContext.setCore(core);
}
try {
// pre register has published our down state
final String baseUrl = getBaseUrl();
final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
final String collection = cloudDesc.getCollectionName();
final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
assert coreZkNodeName != null : "we should have a coreNodeName by now";
String shardId = cloudDesc.getShardId();
Map<String, Object> props = new HashMap<>();
// we only put a subset of props into the leader node
props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
props.put(ZkStateReader.CORE_NAME_PROP, coreName);
props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
log.debug("Register replica - core:{} address:{} collection:{} shard:{}", coreName, baseUrl, cloudDesc.getCollectionName(), shardId);
ZkNodeProps leaderProps = new ZkNodeProps(props);
try {
// If we're a preferred leader, insert ourselves at the head of the queue
boolean joinAtHead = false;
Replica replica = zkStateReader.getClusterState().getReplica(collection, coreZkNodeName);
if (replica != null) {
joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
}
//TODO WHy would replica be null?
if (replica == null || replica.getType() != Type.PULL) {
joinElection(desc, afterExpiration, joinAtHead);
} else if (replica.getType() == Type.PULL) {
if (joinAtHead) {
log.warn("Replica {} was designated as preferred leader but it's type is {}, It won't join election", coreZkNodeName, Type.PULL);
}
log.debug("Replica {} skipping election because it's type is {}", coreZkNodeName, Type.PULL);
startReplicationFromLeader(coreName, false);
}
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
} catch (KeeperException | IOException e) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
}
// in this case, we want to wait for the leader as long as the leader might
// wait for a vote, at least - but also long enough that a large cluster has
// time to get its act together
String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
log.debug("We are " + ourUrl + " and leader is " + leaderUrl);
boolean isLeader = leaderUrl.equals(ourUrl);
Replica.Type replicaType = zkStateReader.getClusterState().getCollection(collection).getReplica(coreZkNodeName).getType();
assert !(isLeader && replicaType == Type.PULL) : "Pull replica became leader!";
try (SolrCore core = cc.getCore(desc.getName())) {
// recover from local transaction log and wait for it to complete before
// going active
// TODO: should this be moved to another thread? To recoveryStrat?
// TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps?
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
boolean isTlogReplicaAndNotLeader = replicaType == Replica.Type.TLOG && !isLeader;
if (isTlogReplicaAndNotLeader) {
String commitVersion = ReplicateFromLeader.getCommitVersion(core);
if (commitVersion != null) {
ulog.copyOverOldUpdates(Long.parseLong(commitVersion));
}
}
// we will call register again after zk expiration and on reload
if (!afterExpiration && !core.isReloaded() && ulog != null && !isTlogReplicaAndNotLeader) {
// disable recovery in case shard is in construction state (for shard splits)
Slice slice = getClusterState().getSlice(collection, shardId);
if (slice.getState() != Slice.State.CONSTRUCTION || !isLeader) {
Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler().getUpdateLog().recoverFromLog();
if (recoveryFuture != null) {
log.info("Replaying tlog for " + ourUrl + " during startup... NOTE: This can take a while.");
// NOTE: this could potentially block for
recoveryFuture.get();
// minutes or more!
// TODO: public as recovering in the mean time?
// TODO: in the future we could do peersync in parallel with recoverFromLog
} else {
log.debug("No LogReplay needed for core={} baseURL={}", core.getName(), baseUrl);
}
}
}
boolean didRecovery = checkRecovery(recoverReloadedCores, isLeader, skipRecovery, collection, coreZkNodeName, core, cc, afterExpiration);
if (!didRecovery) {
if (isTlogReplicaAndNotLeader) {
startReplicationFromLeader(coreName, true);
}
publish(desc, Replica.State.ACTIVE);
}
core.getCoreDescriptor().getCloudDescriptor().setHasRegistered(true);
}
// make sure we have an update cluster state right away
zkStateReader.forceUpdateCollection(collection);
return shardId;
} finally {
MDCLoggingContext.clear();
}
}
use of org.apache.solr.common.cloud.ZooKeeperException in project lucene-solr by apache.
the class TestZkChroot method testNoBootstrapConf.
@Test
public void testNoBootstrapConf() throws Exception {
String chroot = "/foo/bar2";
System.setProperty("bootstrap_conf", "false");
System.setProperty("zkHost", zkServer.getZkHost() + chroot);
SolrZkClient zkClient = null;
try {
zkClient = new SolrZkClient(zkServer.getZkHost(), AbstractZkTestCase.TIMEOUT);
assertFalse("Path '" + chroot + "' should not exist before the test", zkClient.exists(chroot, true));
cores = CoreContainer.createAndLoad(home);
fail("There should be a zk exception, as the initial path doesn't exist");
} catch (ZooKeeperException e) {
// expected
assertFalse("Path shouldn't have been created", // check the path was not created
zkClient.exists(chroot, true));
} finally {
if (cores != null)
cores.shutdown();
if (zkClient != null)
zkClient.close();
}
}
Aggregations