use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.
the class ReplicationZKNodeCleaner method getUnDeletedQueues.
/**
* @return undeletedQueues replicator with its queueIds for removed peers
* @throws IOException
*/
public Map<String, List<String>> getUnDeletedQueues() throws IOException {
Map<String, List<String>> undeletedQueues = new HashMap<>();
Set<String> peerIds = new HashSet<>(this.replicationPeers.getAllPeerIds());
try {
List<String> replicators = this.queuesClient.getListOfReplicators();
for (String replicator : replicators) {
List<String> queueIds = this.queuesClient.getAllQueues(replicator);
for (String queueId : queueIds) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
if (!peerIds.contains(queueInfo.getPeerId())) {
undeletedQueues.computeIfAbsent(replicator, (key) -> new ArrayList<>()).add(queueId);
if (LOG.isDebugEnabled()) {
LOG.debug("Undeleted replication queue for removed peer found: " + String.format("[removedPeerId=%s, replicator=%s, queueId=%s]", queueInfo.getPeerId(), replicator, queueId));
}
}
}
}
} catch (KeeperException ke) {
throw new IOException("Failed to get the replication queues of all replicators", ke);
}
return undeletedQueues;
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.
the class DumpReplicationQueues method dumpQueues.
public String dumpQueues(ZKWatcher zkw, Set<String> peerIds, boolean hdfs) throws Exception {
ReplicationQueueStorage queueStorage;
StringBuilder sb = new StringBuilder();
queueStorage = ReplicationStorageFactory.getReplicationQueueStorage(zkw, getConf());
Set<ServerName> liveRegionServers = ZKUtil.listChildrenNoWatch(zkw, zkw.getZNodePaths().rsZNode).stream().map(ServerName::parseServerName).collect(Collectors.toSet());
// Loops each peer on each RS and dumps the queues
List<ServerName> regionservers = queueStorage.getListOfReplicators();
if (regionservers == null || regionservers.isEmpty()) {
return sb.toString();
}
for (ServerName regionserver : regionservers) {
List<String> queueIds = queueStorage.getAllQueues(regionserver);
if (!liveRegionServers.contains(regionserver)) {
deadRegionServers.add(regionserver.getServerName());
}
for (String queueId : queueIds) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
List<String> wals = queueStorage.getWALsInQueue(regionserver, queueId);
Collections.sort(wals);
if (!peerIds.contains(queueInfo.getPeerId())) {
deletedQueues.add(regionserver + "/" + queueId);
sb.append(formatQueue(regionserver, queueStorage, queueInfo, queueId, wals, true, hdfs));
} else {
sb.append(formatQueue(regionserver, queueStorage, queueInfo, queueId, wals, false, hdfs));
}
}
}
return sb.toString();
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.
the class ReplicationSourceManager method claimQueue.
void claimQueue(ServerName deadRS, String queue) {
// This sleep may not be enough in some cases.
try {
Thread.sleep(sleepBeforeFailover + (long) (ThreadLocalRandom.current().nextFloat() * sleepBeforeFailover));
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting before transferring a queue.");
Thread.currentThread().interrupt();
}
// We try to lock that rs' queue directory
if (server.isStopped()) {
LOG.info("Not transferring queue since we are shutting down");
return;
}
// After claim the queues from dead region server, wewill skip to start the
// RecoveredReplicationSource if the peer has been removed. but there's possible that remove a
// peer with peerId = 2 and add a peer with peerId = 2 again during failover. So we need to get
// a copy of the replication peer first to decide whether we should start the
// RecoveredReplicationSource. If the latest peer is not the old peer, we should also skip to
// start the RecoveredReplicationSource, Otherwise the rs will abort (See HBASE-20475).
String peerId = new ReplicationQueueInfo(queue).getPeerId();
ReplicationPeerImpl oldPeer = replicationPeers.getPeer(peerId);
if (oldPeer == null) {
LOG.info("Not transferring queue since the replication peer {} for queue {} does not exist", peerId, queue);
return;
}
Pair<String, SortedSet<String>> claimedQueue;
try {
claimedQueue = queueStorage.claimQueue(deadRS, queue, server.getServerName());
} catch (ReplicationException e) {
LOG.error("ReplicationException: cannot claim dead region ({})'s " + "replication queue. Znode : ({})" + " Possible solution: check if znode size exceeds jute.maxBuffer value. " + " If so, increase it for both client and server side.", deadRS, queueStorage.getRsNode(deadRS), e);
server.abort("Failed to claim queue from dead regionserver.", e);
return;
}
if (claimedQueue.getSecond().isEmpty()) {
return;
}
String queueId = claimedQueue.getFirst();
Set<String> walsSet = claimedQueue.getSecond();
ReplicationPeerImpl peer = replicationPeers.getPeer(peerId);
if (peer == null || peer != oldPeer) {
LOG.warn("Skipping failover for peer {} of node {}, peer is null", peerId, deadRS);
abortWhenFail(() -> queueStorage.removeQueue(server.getServerName(), queueId));
return;
}
if (server instanceof ReplicationSyncUp.DummyServer && peer.getPeerState().equals(PeerState.DISABLED)) {
LOG.warn("Peer {} is disabled. ReplicationSyncUp tool will skip " + "replicating data to this peer.", peerId);
return;
}
ReplicationSourceInterface src;
try {
src = createSource(queueId, peer);
} catch (IOException e) {
LOG.error("Can not create replication source for peer {} and queue {}", peerId, queueId, e);
server.abort("Failed to create replication source after claiming queue.", e);
return;
}
// synchronized on oldsources to avoid adding recovered source for the to-be-removed peer
synchronized (oldsources) {
peer = replicationPeers.getPeer(src.getPeerId());
if (peer == null || peer != oldPeer) {
src.terminate("Recovered queue doesn't belong to any current peer");
deleteQueue(queueId);
return;
}
// replicated back.
if (peer.getPeerConfig().isSyncReplication()) {
Pair<SyncReplicationState, SyncReplicationState> stateAndNewState = peer.getSyncReplicationStateAndNewState();
if ((stateAndNewState.getFirst().equals(SyncReplicationState.STANDBY) && stateAndNewState.getSecond().equals(SyncReplicationState.NONE)) || stateAndNewState.getSecond().equals(SyncReplicationState.STANDBY)) {
src.terminate("Sync replication peer is in STANDBY state");
deleteQueue(queueId);
return;
}
}
// track sources in walsByIdRecoveredQueues
Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
walsByIdRecoveredQueues.put(queueId, walsByGroup);
for (String wal : walsSet) {
String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
NavigableSet<String> wals = walsByGroup.get(walPrefix);
if (wals == null) {
wals = new TreeSet<>();
walsByGroup.put(walPrefix, wals);
}
wals.add(wal);
}
oldsources.add(src);
LOG.info("Added source for recovered queue {}", src.getQueueId());
for (String wal : walsSet) {
LOG.trace("Enqueueing log from recovered queue for source: " + src.getQueueId());
src.enqueueLog(new Path(oldLogDir, wal));
}
src.startup();
}
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.
the class ReplicationChecker method getUnDeletedQueues.
private Map<ServerName, List<String>> getUnDeletedQueues() throws ReplicationException {
Map<ServerName, List<String>> undeletedQueues = new HashMap<>();
Set<String> peerIds = new HashSet<>(peerStorage.listPeerIds());
for (ServerName replicator : queueStorage.getListOfReplicators()) {
for (String queueId : queueStorage.getAllQueues(replicator)) {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
if (!peerIds.contains(queueInfo.getPeerId())) {
undeletedQueues.computeIfAbsent(replicator, key -> new ArrayList<>()).add(queueId);
LOG.debug("Undeleted replication queue for removed peer found: " + "[removedPeerId={}, replicator={}, queueId={}]", queueInfo.getPeerId(), replicator, queueId);
}
}
}
return undeletedQueues;
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueInfo in project hbase by apache.
the class ReplicationChecker method checkUnDeletedQueues.
public void checkUnDeletedQueues() throws ReplicationException {
undeletedQueueIds = getUnDeletedQueues();
undeletedQueueIds.forEach((replicator, queueIds) -> {
queueIds.forEach(queueId -> {
ReplicationQueueInfo queueInfo = new ReplicationQueueInfo(queueId);
String msg = "Undeleted replication queue for removed peer found: " + String.format("[removedPeerId=%s, replicator=%s, queueId=%s]", queueInfo.getPeerId(), replicator, queueId);
errorReporter.reportError(HbckErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, msg);
});
});
undeletedHFileRefsPeerIds = getUndeletedHFileRefsPeers();
undeletedHFileRefsPeerIds.stream().map(peerId -> "Undeleted replication hfile-refs queue for removed peer " + peerId + " found").forEach(msg -> errorReporter.reportError(HbckErrorReporter.ERROR_CODE.UNDELETED_REPLICATION_QUEUE, msg));
}
Aggregations