Search in sources :

Example 1 with AbstractFSWAL

use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.

the class TestBasicWALEntryStream method testEOFExceptionInOldWALsDirectory.

/**
 * Tests that we handle EOFException properly if the wal has moved to oldWALs directory.
 * @throws Exception exception
 */
@Test
public void testEOFExceptionInOldWALsDirectory() throws Exception {
    assertEquals(1, logQueue.getQueueSize(fakeWalGroupId));
    AbstractFSWAL abstractWAL = (AbstractFSWAL) log;
    Path emptyLogFile = abstractWAL.getCurrentFileName();
    log.rollWriter(true);
    // AsyncFSWAl and FSHLog both moves the log from WALs to oldWALs directory asynchronously.
    // Wait for in flight wal close count to become 0. This makes sure that empty wal is moved to
    // oldWALs directory.
    Waiter.waitFor(CONF, 5000, (Waiter.Predicate<Exception>) () -> abstractWAL.getInflightWALCloseCount() == 0);
    // There will 2 logs in the queue.
    assertEquals(2, logQueue.getQueueSize(fakeWalGroupId));
    // Get the archived dir path for the first wal.
    Path archivePath = AbstractFSWALProvider.findArchivedLog(emptyLogFile, CONF);
    // Make sure that the wal path is not the same as archived Dir path.
    assertNotNull(archivePath);
    assertTrue(fs.exists(archivePath));
    fs.truncate(archivePath, 0);
    // make sure the size of the wal file is 0.
    assertEquals(0, fs.getFileStatus(archivePath).getLen());
    ReplicationSourceManager mockSourceManager = Mockito.mock(ReplicationSourceManager.class);
    ReplicationSource source = Mockito.mock(ReplicationSource.class);
    when(source.isPeerEnabled()).thenReturn(true);
    when(mockSourceManager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
    Configuration localConf = new Configuration(CONF);
    localConf.setInt("replication.source.maxretriesmultiplier", 1);
    localConf.setBoolean("replication.source.eof.autorecovery", true);
    // Start the reader thread.
    createReader(false, localConf);
    // Wait for the replication queue size to be 1. This means that we have handled
    // 0 length wal from oldWALs directory.
    Waiter.waitFor(localConf, 10000, (Waiter.Predicate<Exception>) () -> logQueue.getQueueSize(fakeWalGroupId) == 1);
}
Also used : Path(org.apache.hadoop.fs.Path) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) AtomicLong(java.util.concurrent.atomic.AtomicLong) Configuration(org.apache.hadoop.conf.Configuration) Waiter(org.apache.hadoop.hbase.Waiter) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 2 with AbstractFSWAL

use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.

the class TestDrainReplicationQueuesForStandBy method test.

@Test
public void test() throws Exception {
    UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.ACTIVE);
    UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
    write(UTIL1, 0, 100);
    HRegionServer rs = UTIL1.getRSForFirstRegionInTable(TABLE_NAME);
    String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(((AbstractFSWAL<?>) rs.getWAL(RegionInfoBuilder.newBuilder(TABLE_NAME).build())).getCurrentFileName().getName());
    UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.DOWNGRADE_ACTIVE);
    // transit cluster2 to DA and cluster 1 to S
    verify(UTIL2, 0, 100);
    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
    // delete the original value, and then major compact
    try (Table table = UTIL2.getConnection().getTable(TABLE_NAME)) {
        for (int i = 0; i < 100; i++) {
            table.delete(new Delete(Bytes.toBytes(i)));
        }
    }
    UTIL2.flush(TABLE_NAME);
    UTIL2.compact(TABLE_NAME, true);
    // wait until the new values are replicated back to cluster1
    HRegion region = rs.getRegions(TABLE_NAME).get(0);
    UTIL1.waitFor(30000, new ExplainingPredicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            return region.get(new Get(Bytes.toBytes(99))).isEmpty();
        }

        @Override
        public String explainFailure() throws Exception {
            return "Replication has not been catched up yet";
        }
    });
    // transit cluster1 to DA and cluster2 to S, then we will start replicating from cluster1 to
    // cluster2
    UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.DOWNGRADE_ACTIVE);
    UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
    UTIL1.getAdmin().enableReplicationPeer(PEER_ID);
    // confirm that we will not replicate the old data which causes inconsistency
    ReplicationSource source = (ReplicationSource) ((Replication) rs.getReplicationSourceService()).getReplicationManager().getSource(PEER_ID);
    UTIL1.waitFor(30000, new ExplainingPredicate<Exception>() {

        @Override
        public boolean evaluate() throws Exception {
            return !source.workerThreads.containsKey(walGroupId);
        }

        @Override
        public String explainFailure() throws Exception {
            return "Replication has not been catched up yet";
        }
    });
    HRegion region2 = UTIL2.getMiniHBaseCluster().getRegions(TABLE_NAME).get(0);
    for (int i = 0; i < 100; i++) {
        assertTrue(region2.get(new Get(Bytes.toBytes(i))).isEmpty());
    }
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) Table(org.apache.hadoop.hbase.client.Table) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) Get(org.apache.hadoop.hbase.client.Get) Test(org.junit.Test)

Example 3 with AbstractFSWAL

use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.

the class TestWALOpenAfterDNRollingStart method test.

/**
 * see HBASE-18132 This is a test case of failing open a wal(for replication for example) after
 * all datanode restarted (rolling upgrade, for example). Before this patch, low replication
 * detection is only used when syncing wal. But if the wal haven't had any entry whiten, it will
 * never know all the replica of the wal is broken(because of dn restarting). And this wal can
 * never be open
 * @throws Exception
 */
@Test
public void test() throws Exception {
    HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
    AbstractFSWAL<?> wal = (AbstractFSWAL<?>) server.getWAL(null);
    Path currentFile = wal.getCurrentFileName();
    // restart every dn to simulate a dn rolling upgrade
    for (int i = 0, n = TEST_UTIL.getDFSCluster().getDataNodes().size(); i < n; i++) {
        // This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from
        // the dn list and then add to the tail of this list, we need to always restart the first one
        // to simulate rolling upgrade of every dn.
        TEST_UTIL.getDFSCluster().restartDataNode(0);
        // sleep enough time so log roller can detect the pipeline break and roll log
        Thread.sleep(DN_RESTART_INTERVAL);
    }
    if (!server.getFileSystem().exists(currentFile)) {
        Path walRootDir = CommonFSUtils.getWALRootDir(TEST_UTIL.getConfiguration());
        final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
        currentFile = new Path(oldLogDir, currentFile.getName());
    }
    // if the log is not rolled, then we can never open this wal forever.
    try (WAL.Reader reader = WALFactory.createReader(TEST_UTIL.getTestFileSystem(), currentFile, TEST_UTIL.getConfiguration())) {
        reader.next();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) HRegionServer(org.apache.hadoop.hbase.regionserver.HRegionServer) Test(org.junit.Test)

Example 4 with AbstractFSWAL

use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.

the class AbstractFSWALProvider method extractFileNumFromWAL.

/**
 * It returns the file create timestamp (the 'FileNum') from the file name. For name format see
 * {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal
 * @param wal must not be null
 * @return the file number that is part of the WAL file name
 */
public static long extractFileNumFromWAL(final WAL wal) {
    final Path walPath = ((AbstractFSWAL<?>) wal).getCurrentFileName();
    if (walPath == null) {
        throw new IllegalArgumentException("The WAL path couldn't be null");
    }
    String name = walPath.getName();
    long timestamp = getTimestamp(name);
    if (timestamp == NO_TIMESTAMP) {
        throw new IllegalArgumentException(name + " is not a valid wal file name");
    }
    return timestamp;
}
Also used : Path(org.apache.hadoop.fs.Path) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL)

Example 5 with AbstractFSWAL

use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.

the class AbstractWALRoller method checkLowReplication.

/**
 * we need to check low replication in period, see HBASE-18132
 */
private void checkLowReplication(long now) {
    try {
        for (Entry<WAL, RollController> entry : wals.entrySet()) {
            WAL wal = entry.getKey();
            boolean needRollAlready = entry.getValue().needsRoll(now);
            if (needRollAlready || !(wal instanceof AbstractFSWAL)) {
                continue;
            }
            ((AbstractFSWAL<?>) wal).checkLogLowReplication(checkLowReplicationInterval);
        }
    } catch (Throwable e) {
        LOG.warn("Failed checking low replication", e);
    }
}
Also used : AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL)

Aggregations

AbstractFSWAL (org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL)9 Path (org.apache.hadoop.fs.Path)7 Test (org.junit.Test)5 IOException (java.io.IOException)2 HRegionServer (org.apache.hadoop.hbase.regionserver.HRegionServer)2 WAL (org.apache.hadoop.hbase.wal.WAL)2 FileNotFoundException (java.io.FileNotFoundException)1 ArrayList (java.util.ArrayList)1 ExecutionException (java.util.concurrent.ExecutionException)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Waiter (org.apache.hadoop.hbase.Waiter)1 Delete (org.apache.hadoop.hbase.client.Delete)1 Get (org.apache.hadoop.hbase.client.Get)1 Put (org.apache.hadoop.hbase.client.Put)1 Table (org.apache.hadoop.hbase.client.Table)1 HRegion (org.apache.hadoop.hbase.regionserver.HRegion)1 ReplicationSourceManager (org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager)1