use of org.apache.hadoop.hbase.replication.SyncReplicationState in project hbase by apache.
the class ReplicationSourceManager method claimQueue.
void claimQueue(ServerName deadRS, String queue) {
// This sleep may not be enough in some cases.
try {
Thread.sleep(sleepBeforeFailover + (long) (ThreadLocalRandom.current().nextFloat() * sleepBeforeFailover));
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting before transferring a queue.");
Thread.currentThread().interrupt();
}
// We try to lock that rs' queue directory
if (server.isStopped()) {
LOG.info("Not transferring queue since we are shutting down");
return;
}
// After claim the queues from dead region server, wewill skip to start the
// RecoveredReplicationSource if the peer has been removed. but there's possible that remove a
// peer with peerId = 2 and add a peer with peerId = 2 again during failover. So we need to get
// a copy of the replication peer first to decide whether we should start the
// RecoveredReplicationSource. If the latest peer is not the old peer, we should also skip to
// start the RecoveredReplicationSource, Otherwise the rs will abort (See HBASE-20475).
String peerId = new ReplicationQueueInfo(queue).getPeerId();
ReplicationPeerImpl oldPeer = replicationPeers.getPeer(peerId);
if (oldPeer == null) {
LOG.info("Not transferring queue since the replication peer {} for queue {} does not exist", peerId, queue);
return;
}
Pair<String, SortedSet<String>> claimedQueue;
try {
claimedQueue = queueStorage.claimQueue(deadRS, queue, server.getServerName());
} catch (ReplicationException e) {
LOG.error("ReplicationException: cannot claim dead region ({})'s " + "replication queue. Znode : ({})" + " Possible solution: check if znode size exceeds jute.maxBuffer value. " + " If so, increase it for both client and server side.", deadRS, queueStorage.getRsNode(deadRS), e);
server.abort("Failed to claim queue from dead regionserver.", e);
return;
}
if (claimedQueue.getSecond().isEmpty()) {
return;
}
String queueId = claimedQueue.getFirst();
Set<String> walsSet = claimedQueue.getSecond();
ReplicationPeerImpl peer = replicationPeers.getPeer(peerId);
if (peer == null || peer != oldPeer) {
LOG.warn("Skipping failover for peer {} of node {}, peer is null", peerId, deadRS);
abortWhenFail(() -> queueStorage.removeQueue(server.getServerName(), queueId));
return;
}
if (server instanceof ReplicationSyncUp.DummyServer && peer.getPeerState().equals(PeerState.DISABLED)) {
LOG.warn("Peer {} is disabled. ReplicationSyncUp tool will skip " + "replicating data to this peer.", peerId);
return;
}
ReplicationSourceInterface src;
try {
src = createSource(queueId, peer);
} catch (IOException e) {
LOG.error("Can not create replication source for peer {} and queue {}", peerId, queueId, e);
server.abort("Failed to create replication source after claiming queue.", e);
return;
}
// synchronized on oldsources to avoid adding recovered source for the to-be-removed peer
synchronized (oldsources) {
peer = replicationPeers.getPeer(src.getPeerId());
if (peer == null || peer != oldPeer) {
src.terminate("Recovered queue doesn't belong to any current peer");
deleteQueue(queueId);
return;
}
// replicated back.
if (peer.getPeerConfig().isSyncReplication()) {
Pair<SyncReplicationState, SyncReplicationState> stateAndNewState = peer.getSyncReplicationStateAndNewState();
if ((stateAndNewState.getFirst().equals(SyncReplicationState.STANDBY) && stateAndNewState.getSecond().equals(SyncReplicationState.NONE)) || stateAndNewState.getSecond().equals(SyncReplicationState.STANDBY)) {
src.terminate("Sync replication peer is in STANDBY state");
deleteQueue(queueId);
return;
}
}
// track sources in walsByIdRecoveredQueues
Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
walsByIdRecoveredQueues.put(queueId, walsByGroup);
for (String wal : walsSet) {
String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
NavigableSet<String> wals = walsByGroup.get(walPrefix);
if (wals == null) {
wals = new TreeSet<>();
walsByGroup.put(walPrefix, wals);
}
wals.add(wal);
}
oldsources.add(src);
LOG.info("Added source for recovered queue {}", src.getQueueId());
for (String wal : walsSet) {
LOG.trace("Enqueueing log from recovered queue for source: " + src.getQueueId());
src.enqueueLog(new Path(oldLogDir, wal));
}
src.startup();
}
}
use of org.apache.hadoop.hbase.replication.SyncReplicationState in project hbase by apache.
the class SplitLogWorker method processSyncReplicationWAL.
// returns whether we need to continue the split work
private static boolean processSyncReplicationWAL(String name, Configuration conf, RegionServerServices server, FileSystem fs, Path walDir) throws IOException {
Path walFile = new Path(walDir, name);
String filename = walFile.getName();
Optional<String> optSyncPeerId = SyncReplicationWALProvider.getSyncReplicationPeerIdFromWALName(filename);
if (!optSyncPeerId.isPresent()) {
return true;
}
String peerId = optSyncPeerId.get();
ReplicationPeerImpl peer = server.getReplicationSourceService().getReplicationPeers().getPeer(peerId);
if (peer == null || !peer.getPeerConfig().isSyncReplication()) {
return true;
}
Pair<SyncReplicationState, SyncReplicationState> stateAndNewState = peer.getSyncReplicationStateAndNewState();
if (stateAndNewState.getFirst().equals(SyncReplicationState.ACTIVE) && stateAndNewState.getSecond().equals(SyncReplicationState.NONE)) {
// copy the file to remote and overwrite the previous one
String remoteWALDir = peer.getPeerConfig().getRemoteWALDir();
Path remoteWALDirForPeer = ReplicationUtils.getPeerRemoteWALDir(remoteWALDir, peerId);
Path tmpRemoteWAL = new Path(remoteWALDirForPeer, filename + ".tmp");
FileSystem remoteFs = ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir);
try (FSDataInputStream in = fs.open(walFile);
FSDataOutputStream out = remoteFs.createNonRecursive(tmpRemoteWAL, true, CommonFSUtils.getDefaultBufferSize(remoteFs), remoteFs.getDefaultReplication(tmpRemoteWAL), remoteFs.getDefaultBlockSize(tmpRemoteWAL), null)) {
IOUtils.copy(in, out);
}
Path toCommitRemoteWAL = new Path(remoteWALDirForPeer, filename + ReplicationUtils.RENAME_WAL_SUFFIX);
// Some FileSystem implementations may not support atomic rename so we need to do it in two
// phases
FSUtils.renameFile(remoteFs, tmpRemoteWAL, toCommitRemoteWAL);
FSUtils.renameFile(remoteFs, toCommitRemoteWAL, new Path(remoteWALDirForPeer, filename));
} else if ((stateAndNewState.getFirst().equals(SyncReplicationState.ACTIVE) && stateAndNewState.getSecond().equals(SyncReplicationState.STANDBY)) || stateAndNewState.getFirst().equals(SyncReplicationState.STANDBY)) {
// check whether we still need to process this file
// actually we only write wal file which name is ended with .syncrep in A state, and after
// transiting to a state other than A, we will reopen all the regions so the data in the wal
// will be flushed so the wal file will be archived soon. But it is still possible that there
// is a server crash when we are transiting from A to S, to simplify the logic of the transit
// procedure, here we will also check the remote snapshot directory in state S, so that we do
// not need wait until all the wal files with .syncrep suffix to be archived before finishing
// the procedure.
String remoteWALDir = peer.getPeerConfig().getRemoteWALDir();
Path remoteSnapshotDirForPeer = ReplicationUtils.getPeerSnapshotWALDir(remoteWALDir, peerId);
FileSystem remoteFs = ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir);
if (remoteFs.exists(new Path(remoteSnapshotDirForPeer, filename))) {
// the file has been replayed when the remote cluster was transited from S to DA, the
// content will be replicated back to us so give up split it.
LOG.warn("Giveup splitting {} since it has been replayed in the remote cluster and " + "the content will be replicated back", filename);
return false;
}
}
return true;
}
use of org.apache.hadoop.hbase.replication.SyncReplicationState in project hbase by apache.
the class PeerProcedureHandlerImpl method transitSyncReplicationPeerState.
@Override
public void transitSyncReplicationPeerState(String peerId, int stage, HRegionServer rs) throws ReplicationException, IOException {
ReplicationPeers replicationPeers = replicationSourceManager.getReplicationPeers();
Lock peerLock = peersLock.acquireLock(peerId);
try {
ReplicationPeerImpl peer = replicationPeers.getPeer(peerId);
if (peer == null) {
throw new ReplicationException("Peer with id=" + peerId + " is not cached.");
}
if (!peer.getPeerConfig().isSyncReplication()) {
throw new ReplicationException("Peer with id=" + peerId + " is not synchronous.");
}
SyncReplicationState newSyncReplicationState = peer.getNewSyncReplicationState();
if (stage == 0) {
if (newSyncReplicationState != SyncReplicationState.NONE) {
LOG.warn("The new sync replication state for peer {} has already been set to {}, " + "this should be a retry, give up", peerId, newSyncReplicationState);
return;
}
// refresh the peer state first, as when we transit to STANDBY, we may need to disable the
// peer before processing the sync replication state.
PeerState oldState = peer.getPeerState();
boolean success = false;
try {
PeerState newState = replicationPeers.refreshPeerState(peerId);
if (oldState.equals(PeerState.ENABLED) && newState.equals(PeerState.DISABLED)) {
replicationSourceManager.refreshSources(peerId);
}
success = true;
} finally {
if (!success) {
peer.setPeerState(oldState.equals(PeerState.ENABLED));
}
}
newSyncReplicationState = replicationPeers.refreshPeerNewSyncReplicationState(peerId);
SyncReplicationState oldSyncReplicationState = peer.getSyncReplicationState();
peerActionListener.peerSyncReplicationStateChange(peerId, oldSyncReplicationState, newSyncReplicationState, stage);
} else {
if (newSyncReplicationState == SyncReplicationState.NONE) {
LOG.warn("The new sync replication state for peer {} has already been clear, and the " + "current state is {}, this should be a retry, give up", peerId, newSyncReplicationState);
return;
}
if (newSyncReplicationState == SyncReplicationState.STANDBY) {
replicationSourceManager.drainSources(peerId);
// Need to roll the wals and make the ReplicationSource for this peer track the new file.
// If we do not do this, there will be two problems that can not be addressed at the same
// time. First, if we just throw away the current wal file, and later when we transit the
// peer to DA, and the wal has not been rolled yet, then the new data written to the wal
// file will not be replicated and cause data inconsistency. But if we just track the
// current wal file without rolling, it may contains some data before we transit the peer
// to S, later if we transit the peer to DA, the data will also be replicated and cause
// data inconsistency. So here we need to roll the wal, and let the ReplicationSource
// track the new wal file, and throw the old wal files away.
LogRoller roller = rs.getWalRoller();
roller.requestRollAll();
try {
roller.waitUntilWalRollFinished();
} catch (InterruptedException e) {
// reset the interrupted flag
Thread.currentThread().interrupt();
throw (IOException) new InterruptedIOException("Interrupted while waiting for wal roll finish").initCause(e);
}
}
SyncReplicationState oldState = peer.getSyncReplicationState();
peerActionListener.peerSyncReplicationStateChange(peerId, oldState, newSyncReplicationState, stage);
peer.transitSyncReplicationState();
}
} finally {
peerLock.unlock();
}
}
use of org.apache.hadoop.hbase.replication.SyncReplicationState in project hbase by apache.
the class ReplicationPeerManager method preTransitPeerSyncReplicationState.
/**
* @return the old desciption of the peer
*/
ReplicationPeerDescription preTransitPeerSyncReplicationState(String peerId, SyncReplicationState state) throws DoNotRetryIOException {
ReplicationPeerDescription desc = checkPeerExists(peerId);
SyncReplicationState fromState = desc.getSyncReplicationState();
EnumSet<SyncReplicationState> allowedToStates = allowedTransition.get(fromState);
if (allowedToStates == null || !allowedToStates.contains(state)) {
throw new DoNotRetryIOException("Can not transit current cluster state from " + fromState + " to " + state + " for peer id=" + peerId);
}
return desc;
}
use of org.apache.hadoop.hbase.replication.SyncReplicationState in project hbase by apache.
the class ReplicationPeerManager method create.
public static ReplicationPeerManager create(ZKWatcher zk, Configuration conf, String clusterId) throws ReplicationException {
ReplicationPeerStorage peerStorage = ReplicationStorageFactory.getReplicationPeerStorage(zk, conf);
ConcurrentMap<String, ReplicationPeerDescription> peers = new ConcurrentHashMap<>();
for (String peerId : peerStorage.listPeerIds()) {
ReplicationPeerConfig peerConfig = peerStorage.getPeerConfig(peerId);
if (ReplicationUtils.LEGACY_REGION_REPLICATION_ENDPOINT_NAME.equals(peerConfig.getReplicationEndpointImpl())) {
// we do not use this endpoint for region replication any more, see HBASE-26233
LOG.info("Legacy region replication peer found, removing: {}", peerConfig);
peerStorage.removePeer(peerId);
continue;
}
peerConfig = ReplicationPeerConfigUtil.updateReplicationBasePeerConfigs(conf, peerConfig);
peerStorage.updatePeerConfig(peerId, peerConfig);
boolean enabled = peerStorage.isPeerEnabled(peerId);
SyncReplicationState state = peerStorage.getPeerSyncReplicationState(peerId);
peers.put(peerId, new ReplicationPeerDescription(peerId, enabled, peerConfig, state));
}
return new ReplicationPeerManager(peerStorage, ReplicationStorageFactory.getReplicationQueueStorage(zk, conf), peers, conf, clusterId);
}
Aggregations