use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hbase by apache.
the class ServerManager method checkForRSznode.
/**
* Check for an odd state, where we think an RS is up but it is not. Do it on OPEN.
* This is only case where the check makes sense.
*
* <p>We are checking for instance of HBASE-9593 where a RS registered but died before it put
* up its znode in zk. In this case, the RS made it into the list of online servers but it
* is not actually UP. We do the check here where there is an evident problem rather
* than do some crazy footwork where we'd have master check zk after a RS had reported
* for duty with provisional state followed by a confirmed state; that'd be a mess.
* Real fix is HBASE-17733.
*/
private void checkForRSznode(final ServerName serverName, final ServiceException se) {
if (se.getCause() == null)
return;
Throwable t = se.getCause();
if (t instanceof ConnectException) {
// If this, proceed to do cleanup.
} else {
// Look for FailedServerException
if (!(t instanceof IOException))
return;
if (t.getCause() == null)
return;
if (!(t.getCause() instanceof FailedServerException))
return;
// Ok, found FailedServerException -- continue.
}
if (!isServerOnline(serverName))
return;
// We think this server is online. Check it has a znode up. Currently, a RS
// registers an ephereral znode in zk. If not present, something is up. Maybe
// HBASE-9593 where RS crashed AFTER reportForDuty but BEFORE it put up an ephemeral
// znode.
List<String> servers = null;
try {
servers = getRegionServersInZK(this.master.getZooKeeper());
} catch (KeeperException ke) {
LOG.warn("Failed to list regionservers", ke);
// ZK is malfunctioning, don't hang here
}
boolean found = false;
if (servers != null) {
for (String serverNameAsStr : servers) {
ServerName sn = ServerName.valueOf(serverNameAsStr);
if (sn.equals(serverName)) {
// Found a server up in zk.
found = true;
break;
}
}
}
if (!found) {
LOG.warn("Online server " + serverName.toString() + " has no corresponding " + "ephemeral znode (Did it die before registering in zk?); " + "calling expire to clean it up!");
expireServer(serverName);
}
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hbase by apache.
the class ReplicationZKNodeCleaner method getUnDeletedQueues.
/**
* @return undeletedQueues replicator with its queueIds for removed peers
* @throws IOException
*/
public Map<String, List<String>> getUnDeletedQueues() throws IOException {
Map<String, List<String>> undeletedQueues = new HashMap<>();
Set<String> peerIds = new HashSet<>(this.replicationPeers.getAllPeerIds());
try {
List<String> replicators = this.queuesClient.getListOfReplicators();
for (String replicator : replicators) {
List<String> queueIds = this.queuesClient.getAllQueues(replicator);
for (String queueId : queueIds) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
if (!peerIds.contains(queueInfo.getPeerId())) {
undeletedQueues.computeIfAbsent(replicator, (key) -> new ArrayList<>()).add(queueId);
if (LOG.isDebugEnabled()) {
LOG.debug("Undeleted replication queue for removed peer found: " + String.format("[removedPeerId=%s, replicator=%s, queueId=%s]", queueInfo.getPeerId(), replicator, queueId));
}
}
}
}
} catch (KeeperException ke) {
throw new IOException("Failed to get the replication queues of all replicators", ke);
}
return undeletedQueues;
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hbase by apache.
the class ReplicationZKNodeCleaner method getUnDeletedHFileRefsQueues.
/**
* @return undeletedHFileRefsQueue replicator with its undeleted queueIds for removed peers in
* hfile-refs queue
* @throws IOException
*/
public Set<String> getUnDeletedHFileRefsQueues() throws IOException {
Set<String> undeletedHFileRefsQueue = new HashSet<>();
Set<String> peerIds = new HashSet<>(this.replicationPeers.getAllPeerIds());
String hfileRefsZNode = queueDeletor.getHfileRefsZNode();
try {
if (-1 == ZKUtil.checkExists(zkw, hfileRefsZNode)) {
return null;
}
List<String> listOfPeers = this.queuesClient.getAllPeersFromHFileRefsQueue();
Set<String> peers = new HashSet<>(listOfPeers);
peers.removeAll(peerIds);
if (!peers.isEmpty()) {
undeletedHFileRefsQueue.addAll(peers);
}
} catch (KeeperException e) {
throw new IOException("Failed to get list of all peers from hfile-refs znode " + hfileRefsZNode, e);
}
return undeletedHFileRefsQueue;
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hbase by apache.
the class TestZooKeeper method testCreateSilentIsReallySilent.
/**
* A test for HBASE-3238
* @throws IOException A connection attempt to zk failed
* @throws InterruptedException One of the non ZKUtil actions was interrupted
* @throws KeeperException Any of the zookeeper connections had a
* KeeperException
*/
@Test
public void testCreateSilentIsReallySilent() throws InterruptedException, KeeperException, IOException {
Configuration c = TEST_UTIL.getConfiguration();
String aclZnode = "/aclRoot";
String quorumServers = ZKConfig.getZKQuorumServersString(c);
// 5 seconds
int sessionTimeout = 5 * 1000;
ZooKeeper zk = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance);
zk.addAuthInfo("digest", "hbase:rox".getBytes());
// Assumes the root of the ZooKeeper space is writable as it creates a node
// wherever the cluster home is defined.
ZooKeeperWatcher zk2 = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), "testCreateSilentIsReallySilent", null);
// Save the previous ACL
Stat s = null;
List<ACL> oldACL = null;
while (true) {
try {
s = new Stat();
oldACL = zk.getACL("/", s);
break;
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case SESSIONEXPIRED:
case OPERATIONTIMEOUT:
LOG.warn("Possibly transient ZooKeeper exception", e);
Threads.sleep(100);
break;
default:
throw e;
}
}
}
// Add retries in case of retryable zk exceptions.
while (true) {
try {
zk.setACL("/", ZooDefs.Ids.CREATOR_ALL_ACL, -1);
break;
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case SESSIONEXPIRED:
case OPERATIONTIMEOUT:
LOG.warn("Possibly transient ZooKeeper exception: " + e);
Threads.sleep(100);
break;
default:
throw e;
}
}
}
while (true) {
try {
zk.create(aclZnode, null, ZooDefs.Ids.CREATOR_ALL_ACL, CreateMode.PERSISTENT);
break;
} catch (KeeperException e) {
switch(e.code()) {
case CONNECTIONLOSS:
case SESSIONEXPIRED:
case OPERATIONTIMEOUT:
LOG.warn("Possibly transient ZooKeeper exception: " + e);
Threads.sleep(100);
break;
default:
throw e;
}
}
}
zk.close();
ZKUtil.createAndFailSilent(zk2, aclZnode);
// Restore the ACL
ZooKeeper zk3 = new ZooKeeper(quorumServers, sessionTimeout, EmptyWatcher.instance);
zk3.addAuthInfo("digest", "hbase:rox".getBytes());
try {
zk3.setACL("/", oldACL, -1);
} finally {
zk3.close();
}
}
use of org.apache.flink.shaded.zookeeper3.org.apache.zookeeper.KeeperException in project hbase by apache.
the class HBaseAdmin method getMasterInfoPort.
@Override
public int getMasterInfoPort() throws IOException {
// TODO: Fix! Reaching into internal implementation!!!!
ConnectionImplementation connection = (ConnectionImplementation) this.connection;
ZooKeeperKeepAliveConnection zkw = connection.getKeepAliveZooKeeperWatcher();
try {
return MasterAddressTracker.getMasterInfoPort(zkw);
} catch (KeeperException e) {
throw new IOException("Failed to get master info port from MasterAddressTracker", e);
}
}
Aggregations