use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.
the class TestReplicationSourceManager method testPeerRemovalCleanup.
/**
* Test whether calling removePeer() on a ReplicationSourceManager that failed on initializing the
* corresponding ReplicationSourceInterface correctly cleans up the corresponding
* replication queue and ReplicationPeer.
* See HBASE-16096.
*/
@Test
public void testPeerRemovalCleanup() throws Exception {
String replicationSourceImplName = conf.get("replication.replicationsource.implementation");
final String peerId = "FakePeer";
final ReplicationPeerConfig peerConfig = ReplicationPeerConfig.newBuilder().setClusterKey(utility.getZkCluster().getAddress().toString() + ":/hbase").build();
try {
DummyServer server = new DummyServer();
ReplicationQueueStorage rq = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), server.getConfiguration());
// Purposely fail ReplicationSourceManager.addSource() by causing ReplicationSourceInterface
// initialization to throw an exception.
conf.set("replication.replicationsource.implementation", FailInitializeDummyReplicationSource.class.getName());
manager.getReplicationPeers();
// Set up the znode and ReplicationPeer for the fake peer
// Don't wait for replication source to initialize, we know it won't.
addPeerAndWait(peerId, peerConfig, false);
// Sanity check
assertNull(manager.getSource(peerId));
// Create a replication queue for the fake peer
rq.addWAL(server.getServerName(), peerId, "FakeFile");
// Unregister peer, this should remove the peer and clear all queues associated with it
// Need to wait for the ReplicationTracker to pick up the changes and notify listeners.
removePeerAndWait(peerId);
assertFalse(rq.getAllQueues(server.getServerName()).contains(peerId));
} finally {
conf.set("replication.replicationsource.implementation", replicationSourceImplName);
removePeerAndWait(peerId);
}
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.
the class TestLogsCleaner method testLogCleaning.
/**
* This tests verifies LogCleaner works correctly with WALs and Procedure WALs located
* in the same oldWALs directory.
* Created files:
* - 2 invalid files
* - 5 old Procedure WALs
* - 30 old WALs from which 3 are in replication
* - 5 recent Procedure WALs
* - 1 recent WAL
* - 1 very new WAL (timestamp in future)
* - masterProcedureWALs subdirectory
* Files which should stay:
* - 3 replication WALs
* - 2 new WALs
* - 5 latest Procedure WALs
* - masterProcedureWALs subdirectory
*/
@Test
public void testLogCleaning() throws Exception {
// set TTLs
long ttlWAL = 2000;
long ttlProcedureWAL = 4000;
conf.setLong("hbase.master.logcleaner.ttl", ttlWAL);
conf.setLong("hbase.master.procedurewalcleaner.ttl", ttlProcedureWAL);
HMaster.decorateMasterConfiguration(conf);
Server server = new DummyServer();
ReplicationQueueStorage queueStorage = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), conf);
String fakeMachineName = URLEncoder.encode(server.getServerName().toString(), StandardCharsets.UTF_8.name());
final FileSystem fs = FileSystem.get(conf);
fs.mkdirs(OLD_PROCEDURE_WALS_DIR);
final long now = EnvironmentEdgeManager.currentTime();
// Case 1: 2 invalid files, which would be deleted directly
fs.createNewFile(new Path(OLD_WALS_DIR, "a"));
fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + "a"));
// Case 2: 5 Procedure WALs that are old which would be deleted
for (int i = 1; i <= 5; i++) {
final Path fileName = new Path(OLD_PROCEDURE_WALS_DIR, String.format("pv2-%020d.log", i));
fs.createNewFile(fileName);
}
// Sleep for sometime to get old procedure WALs
Thread.sleep(ttlProcedureWAL - ttlWAL);
// Case 3: old WALs which would be deletable
for (int i = 1; i <= 30; i++) {
Path fileName = new Path(OLD_WALS_DIR, fakeMachineName + "." + (now - i));
fs.createNewFile(fileName);
// files would pass TimeToLiveLogCleaner but would be rejected by ReplicationLogCleaner
if (i % (30 / 3) == 0) {
queueStorage.addWAL(server.getServerName(), fakeMachineName, fileName.getName());
LOG.info("Replication log file: " + fileName);
}
}
// Case 5: 5 Procedure WALs that are new, will stay
for (int i = 6; i <= 10; i++) {
Path fileName = new Path(OLD_PROCEDURE_WALS_DIR, String.format("pv2-%020d.log", i));
fs.createNewFile(fileName);
}
// Sleep for sometime to get newer modification time
Thread.sleep(ttlWAL);
fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + now));
// Case 6: 1 newer WAL, not even deletable for TimeToLiveLogCleaner,
// so we are not going down the chain
fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + (now + ttlWAL)));
FileStatus[] status = fs.listStatus(OLD_WALS_DIR);
LOG.info("File status: {}", Arrays.toString(status));
// There should be 34 files and 1 masterProcedureWALs directory
assertEquals(35, fs.listStatus(OLD_WALS_DIR).length);
// 10 procedure WALs
assertEquals(10, fs.listStatus(OLD_PROCEDURE_WALS_DIR).length);
LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, OLD_WALS_DIR, POOL, null);
cleaner.chore();
// In oldWALs we end up with the current WAL, a newer WAL, the 3 old WALs which
// are scheduled for replication and masterProcedureWALs directory
TEST_UTIL.waitFor(1000, (Waiter.Predicate<Exception>) () -> 6 == fs.listStatus(OLD_WALS_DIR).length);
// In masterProcedureWALs we end up with 5 newer Procedure WALs
TEST_UTIL.waitFor(1000, (Waiter.Predicate<Exception>) () -> 5 == fs.listStatus(OLD_PROCEDURE_WALS_DIR).length);
if (LOG.isDebugEnabled()) {
FileStatus[] statusOldWALs = fs.listStatus(OLD_WALS_DIR);
FileStatus[] statusProcedureWALs = fs.listStatus(OLD_PROCEDURE_WALS_DIR);
LOG.debug("Kept log file for oldWALs: {}", Arrays.toString(statusOldWALs));
LOG.debug("Kept log file for masterProcedureWALs: {}", Arrays.toString(statusProcedureWALs));
}
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.
the class TestReplicationBarrierCleaner method testDeleteBarriers.
@Test
public void testDeleteBarriers() throws IOException, ReplicationException {
TableName tableName = TableName.valueOf(name.getMethodName());
RegionInfo region = RegionInfoBuilder.newBuilder(tableName).build();
addBarrier(region, 10, 20, 30, 40, 50, 60);
// two peers
ReplicationQueueStorage queueStorage = create(-1L, 2L, 15L, 25L, 20L, 25L, 65L, 55L, 70L, 70L);
List<String> peerIds = Lists.newArrayList("1", "2");
@SuppressWarnings("unchecked") ReplicationPeerManager peerManager = create(queueStorage, peerIds, peerIds, peerIds, peerIds, peerIds);
ReplicationBarrierCleaner cleaner = create(peerManager);
// beyond the first barrier, no deletion
cleaner.chore();
assertArrayEquals(new long[] { 10, 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
// in the first range, still no deletion
cleaner.chore();
assertArrayEquals(new long[] { 10, 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
// in the second range, 10 is deleted
cleaner.chore();
assertArrayEquals(new long[] { 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
// between 50 and 60, so the barriers before 50 will be deleted
cleaner.chore();
assertArrayEquals(new long[] { 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
// in the last open range, 50 is deleted
cleaner.chore();
assertArrayEquals(new long[] { 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.
the class ClaimReplicationQueuesProcedure method execute.
@Override
protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
ReplicationQueueStorage storage = env.getReplicationPeerManager().getQueueStorage();
try {
List<String> queues = storage.getAllQueues(crashedServer);
// as it may still be used by region servers which have not been upgraded yet.
for (Iterator<String> iter = queues.iterator(); iter.hasNext(); ) {
ReplicationQueueInfo queue = new ReplicationQueueInfo(iter.next());
if (queue.getPeerId().equals(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_PEER)) {
LOG.info("Found replication queue {} for legacy region replication peer, " + "skipping claiming and removing...", queue.getQueueId());
iter.remove();
storage.removeQueue(crashedServer, queue.getQueueId());
}
}
if (queues.isEmpty()) {
LOG.debug("Finish claiming replication queues for {}", crashedServer);
storage.removeReplicatorIfQueueIsEmpty(crashedServer);
// we are done
return null;
}
LOG.debug("There are {} replication queues need to be claimed for {}", queues.size(), crashedServer);
List<ServerName> targetServers = env.getMasterServices().getServerManager().getOnlineServersList();
if (targetServers.isEmpty()) {
throw new ReplicationException("no region server available");
}
Collections.shuffle(targetServers);
ClaimReplicationQueueRemoteProcedure[] procs = new ClaimReplicationQueueRemoteProcedure[Math.min(queues.size(), targetServers.size())];
for (int i = 0; i < procs.length; i++) {
procs[i] = new ClaimReplicationQueueRemoteProcedure(crashedServer, queues.get(i), targetServers.get(i));
}
return procs;
} catch (ReplicationException e) {
if (retryCounter == null) {
retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
}
long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
LOG.warn("Failed to claim replication queues for {}, suspend {}secs {}; {};", crashedServer, backoff / 1000, e);
setTimeout(Math.toIntExact(backoff));
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
skipPersistence();
throw new ProcedureSuspendedException();
}
}
use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.
the class TestReplicationSourceManager method testCleanupUnknownPeerZNode.
@Test
public void testCleanupUnknownPeerZNode() throws Exception {
Server server = new DummyServer("hostname2.example.org");
ReplicationQueueStorage rq = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), server.getConfiguration());
// populate some znodes in the peer znode
// add log to an unknown peer
String group = "testgroup";
rq.addWAL(server.getServerName(), "2", group + ".log1");
rq.addWAL(server.getServerName(), "2", group + ".log2");
manager.claimQueue(server.getServerName(), "2");
// The log of the unknown peer should be removed from zk
for (String peer : manager.getAllQueues()) {
assertTrue(peer.startsWith("1"));
}
}
Aggregations