Search in sources :

Example 6 with ReplicationQueueStorage

use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.

the class TestReplicationSourceManager method testPeerRemovalCleanup.

/**
 * Test whether calling removePeer() on a ReplicationSourceManager that failed on initializing the
 * corresponding ReplicationSourceInterface correctly cleans up the corresponding
 * replication queue and ReplicationPeer.
 * See HBASE-16096.
 */
@Test
public void testPeerRemovalCleanup() throws Exception {
    String replicationSourceImplName = conf.get("replication.replicationsource.implementation");
    final String peerId = "FakePeer";
    final ReplicationPeerConfig peerConfig = ReplicationPeerConfig.newBuilder().setClusterKey(utility.getZkCluster().getAddress().toString() + ":/hbase").build();
    try {
        DummyServer server = new DummyServer();
        ReplicationQueueStorage rq = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), server.getConfiguration());
        // Purposely fail ReplicationSourceManager.addSource() by causing ReplicationSourceInterface
        // initialization to throw an exception.
        conf.set("replication.replicationsource.implementation", FailInitializeDummyReplicationSource.class.getName());
        manager.getReplicationPeers();
        // Set up the znode and ReplicationPeer for the fake peer
        // Don't wait for replication source to initialize, we know it won't.
        addPeerAndWait(peerId, peerConfig, false);
        // Sanity check
        assertNull(manager.getSource(peerId));
        // Create a replication queue for the fake peer
        rq.addWAL(server.getServerName(), peerId, "FakeFile");
        // Unregister peer, this should remove the peer and clear all queues associated with it
        // Need to wait for the ReplicationTracker to pick up the changes and notify listeners.
        removePeerAndWait(peerId);
        assertFalse(rq.getAllQueues(server.getServerName()).contains(peerId));
    } finally {
        conf.set("replication.replicationsource.implementation", replicationSourceImplName);
        removePeerAndWait(peerId);
    }
}
Also used : ReplicationPeerConfig(org.apache.hadoop.hbase.replication.ReplicationPeerConfig) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) Test(org.junit.Test)

Example 7 with ReplicationQueueStorage

use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.

the class TestLogsCleaner method testLogCleaning.

/**
 * This tests verifies LogCleaner works correctly with WALs and Procedure WALs located
 * in the same oldWALs directory.
 * Created files:
 * - 2 invalid files
 * - 5 old Procedure WALs
 * - 30 old WALs from which 3 are in replication
 * - 5 recent Procedure WALs
 * - 1 recent WAL
 * - 1 very new WAL (timestamp in future)
 * - masterProcedureWALs subdirectory
 * Files which should stay:
 * - 3 replication WALs
 * - 2 new WALs
 * - 5 latest Procedure WALs
 * - masterProcedureWALs subdirectory
 */
@Test
public void testLogCleaning() throws Exception {
    // set TTLs
    long ttlWAL = 2000;
    long ttlProcedureWAL = 4000;
    conf.setLong("hbase.master.logcleaner.ttl", ttlWAL);
    conf.setLong("hbase.master.procedurewalcleaner.ttl", ttlProcedureWAL);
    HMaster.decorateMasterConfiguration(conf);
    Server server = new DummyServer();
    ReplicationQueueStorage queueStorage = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), conf);
    String fakeMachineName = URLEncoder.encode(server.getServerName().toString(), StandardCharsets.UTF_8.name());
    final FileSystem fs = FileSystem.get(conf);
    fs.mkdirs(OLD_PROCEDURE_WALS_DIR);
    final long now = EnvironmentEdgeManager.currentTime();
    // Case 1: 2 invalid files, which would be deleted directly
    fs.createNewFile(new Path(OLD_WALS_DIR, "a"));
    fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + "a"));
    // Case 2: 5 Procedure WALs that are old which would be deleted
    for (int i = 1; i <= 5; i++) {
        final Path fileName = new Path(OLD_PROCEDURE_WALS_DIR, String.format("pv2-%020d.log", i));
        fs.createNewFile(fileName);
    }
    // Sleep for sometime to get old procedure WALs
    Thread.sleep(ttlProcedureWAL - ttlWAL);
    // Case 3: old WALs which would be deletable
    for (int i = 1; i <= 30; i++) {
        Path fileName = new Path(OLD_WALS_DIR, fakeMachineName + "." + (now - i));
        fs.createNewFile(fileName);
        // files would pass TimeToLiveLogCleaner but would be rejected by ReplicationLogCleaner
        if (i % (30 / 3) == 0) {
            queueStorage.addWAL(server.getServerName(), fakeMachineName, fileName.getName());
            LOG.info("Replication log file: " + fileName);
        }
    }
    // Case 5: 5 Procedure WALs that are new, will stay
    for (int i = 6; i <= 10; i++) {
        Path fileName = new Path(OLD_PROCEDURE_WALS_DIR, String.format("pv2-%020d.log", i));
        fs.createNewFile(fileName);
    }
    // Sleep for sometime to get newer modification time
    Thread.sleep(ttlWAL);
    fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + now));
    // Case 6: 1 newer WAL, not even deletable for TimeToLiveLogCleaner,
    // so we are not going down the chain
    fs.createNewFile(new Path(OLD_WALS_DIR, fakeMachineName + "." + (now + ttlWAL)));
    FileStatus[] status = fs.listStatus(OLD_WALS_DIR);
    LOG.info("File status: {}", Arrays.toString(status));
    // There should be 34 files and 1 masterProcedureWALs directory
    assertEquals(35, fs.listStatus(OLD_WALS_DIR).length);
    // 10 procedure WALs
    assertEquals(10, fs.listStatus(OLD_PROCEDURE_WALS_DIR).length);
    LogCleaner cleaner = new LogCleaner(1000, server, conf, fs, OLD_WALS_DIR, POOL, null);
    cleaner.chore();
    // In oldWALs we end up with the current WAL, a newer WAL, the 3 old WALs which
    // are scheduled for replication and masterProcedureWALs directory
    TEST_UTIL.waitFor(1000, (Waiter.Predicate<Exception>) () -> 6 == fs.listStatus(OLD_WALS_DIR).length);
    // In masterProcedureWALs we end up with 5 newer Procedure WALs
    TEST_UTIL.waitFor(1000, (Waiter.Predicate<Exception>) () -> 5 == fs.listStatus(OLD_PROCEDURE_WALS_DIR).length);
    if (LOG.isDebugEnabled()) {
        FileStatus[] statusOldWALs = fs.listStatus(OLD_WALS_DIR);
        FileStatus[] statusProcedureWALs = fs.listStatus(OLD_PROCEDURE_WALS_DIR);
        LOG.debug("Kept log file for oldWALs: {}", Arrays.toString(statusOldWALs));
        LOG.debug("Kept log file for masterProcedureWALs: {}", Arrays.toString(statusProcedureWALs));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) MockServer(org.apache.hadoop.hbase.util.MockServer) Server(org.apache.hadoop.hbase.Server) ReplicationLogCleaner(org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner) ReplicationException(org.apache.hadoop.hbase.replication.ReplicationException) ZooKeeperConnectionException(org.apache.hadoop.hbase.ZooKeeperConnectionException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) Waiter(org.apache.hadoop.hbase.Waiter) Test(org.junit.Test)

Example 8 with ReplicationQueueStorage

use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.

the class TestReplicationBarrierCleaner method testDeleteBarriers.

@Test
public void testDeleteBarriers() throws IOException, ReplicationException {
    TableName tableName = TableName.valueOf(name.getMethodName());
    RegionInfo region = RegionInfoBuilder.newBuilder(tableName).build();
    addBarrier(region, 10, 20, 30, 40, 50, 60);
    // two peers
    ReplicationQueueStorage queueStorage = create(-1L, 2L, 15L, 25L, 20L, 25L, 65L, 55L, 70L, 70L);
    List<String> peerIds = Lists.newArrayList("1", "2");
    @SuppressWarnings("unchecked") ReplicationPeerManager peerManager = create(queueStorage, peerIds, peerIds, peerIds, peerIds, peerIds);
    ReplicationBarrierCleaner cleaner = create(peerManager);
    // beyond the first barrier, no deletion
    cleaner.chore();
    assertArrayEquals(new long[] { 10, 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
    // in the first range, still no deletion
    cleaner.chore();
    assertArrayEquals(new long[] { 10, 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
    // in the second range, 10 is deleted
    cleaner.chore();
    assertArrayEquals(new long[] { 20, 30, 40, 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
    // between 50 and 60, so the barriers before 50 will be deleted
    cleaner.chore();
    assertArrayEquals(new long[] { 50, 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
    // in the last open range, 50 is deleted
    cleaner.chore();
    assertArrayEquals(new long[] { 60 }, ReplicationBarrierFamilyFormat.getReplicationBarriers(UTIL.getConnection(), region.getRegionName()));
}
Also used : TableName(org.apache.hadoop.hbase.TableName) ReplicationPeerManager(org.apache.hadoop.hbase.master.replication.ReplicationPeerManager) RegionInfo(org.apache.hadoop.hbase.client.RegionInfo) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) Test(org.junit.Test)

Example 9 with ReplicationQueueStorage

use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.

the class ClaimReplicationQueuesProcedure method execute.

@Override
protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
    ReplicationQueueStorage storage = env.getReplicationPeerManager().getQueueStorage();
    try {
        List<String> queues = storage.getAllQueues(crashedServer);
        // as it may still be used by region servers which have not been upgraded yet.
        for (Iterator<String> iter = queues.iterator(); iter.hasNext(); ) {
            ReplicationQueueInfo queue = new ReplicationQueueInfo(iter.next());
            if (queue.getPeerId().equals(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_PEER)) {
                LOG.info("Found replication queue {} for legacy region replication peer, " + "skipping claiming and removing...", queue.getQueueId());
                iter.remove();
                storage.removeQueue(crashedServer, queue.getQueueId());
            }
        }
        if (queues.isEmpty()) {
            LOG.debug("Finish claiming replication queues for {}", crashedServer);
            storage.removeReplicatorIfQueueIsEmpty(crashedServer);
            // we are done
            return null;
        }
        LOG.debug("There are {} replication queues need to be claimed for {}", queues.size(), crashedServer);
        List<ServerName> targetServers = env.getMasterServices().getServerManager().getOnlineServersList();
        if (targetServers.isEmpty()) {
            throw new ReplicationException("no region server available");
        }
        Collections.shuffle(targetServers);
        ClaimReplicationQueueRemoteProcedure[] procs = new ClaimReplicationQueueRemoteProcedure[Math.min(queues.size(), targetServers.size())];
        for (int i = 0; i < procs.length; i++) {
            procs[i] = new ClaimReplicationQueueRemoteProcedure(crashedServer, queues.get(i), targetServers.get(i));
        }
        return procs;
    } catch (ReplicationException e) {
        if (retryCounter == null) {
            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
        }
        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
        LOG.warn("Failed to claim replication queues for {}, suspend {}secs {}; {};", crashedServer, backoff / 1000, e);
        setTimeout(Math.toIntExact(backoff));
        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
        skipPersistence();
        throw new ProcedureSuspendedException();
    }
}
Also used : ReplicationQueueInfo(org.apache.hadoop.hbase.replication.ReplicationQueueInfo) ServerName(org.apache.hadoop.hbase.ServerName) ReplicationException(org.apache.hadoop.hbase.replication.ReplicationException) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) ProcedureSuspendedException(org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException)

Example 10 with ReplicationQueueStorage

use of org.apache.hadoop.hbase.replication.ReplicationQueueStorage in project hbase by apache.

the class TestReplicationSourceManager method testCleanupUnknownPeerZNode.

@Test
public void testCleanupUnknownPeerZNode() throws Exception {
    Server server = new DummyServer("hostname2.example.org");
    ReplicationQueueStorage rq = ReplicationStorageFactory.getReplicationQueueStorage(server.getZooKeeper(), server.getConfiguration());
    // populate some znodes in the peer znode
    // add log to an unknown peer
    String group = "testgroup";
    rq.addWAL(server.getServerName(), "2", group + ".log1");
    rq.addWAL(server.getServerName(), "2", group + ".log2");
    manager.claimQueue(server.getServerName(), "2");
    // The log of the unknown peer should be removed from zk
    for (String peer : manager.getAllQueues()) {
        assertTrue(peer.startsWith("1"));
    }
}
Also used : MockServer(org.apache.hadoop.hbase.util.MockServer) Server(org.apache.hadoop.hbase.Server) ReplicationQueueStorage(org.apache.hadoop.hbase.replication.ReplicationQueueStorage) Test(org.junit.Test)

Aggregations

ReplicationQueueStorage (org.apache.hadoop.hbase.replication.ReplicationQueueStorage)17 Test (org.junit.Test)11 ServerName (org.apache.hadoop.hbase.ServerName)6 Server (org.apache.hadoop.hbase.Server)5 TableName (org.apache.hadoop.hbase.TableName)5 ReplicationException (org.apache.hadoop.hbase.replication.ReplicationException)5 Path (org.apache.hadoop.fs.Path)4 IOException (java.io.IOException)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 Connection (org.apache.hadoop.hbase.client.Connection)3 Get (org.apache.hadoop.hbase.client.Get)3 RegionInfo (org.apache.hadoop.hbase.client.RegionInfo)3 Table (org.apache.hadoop.hbase.client.Table)3 MockServer (org.apache.hadoop.hbase.util.MockServer)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Cell (org.apache.hadoop.hbase.Cell)2 ZooKeeperConnectionException (org.apache.hadoop.hbase.ZooKeeperConnectionException)2 Delete (org.apache.hadoop.hbase.client.Delete)2