use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.
the class TestBasicWALEntryStream method testEOFExceptionInOldWALsDirectory.
/**
* Tests that we handle EOFException properly if the wal has moved to oldWALs directory.
* @throws Exception exception
*/
@Test
public void testEOFExceptionInOldWALsDirectory() throws Exception {
assertEquals(1, logQueue.getQueueSize(fakeWalGroupId));
AbstractFSWAL abstractWAL = (AbstractFSWAL) log;
Path emptyLogFile = abstractWAL.getCurrentFileName();
log.rollWriter(true);
// AsyncFSWAl and FSHLog both moves the log from WALs to oldWALs directory asynchronously.
// Wait for in flight wal close count to become 0. This makes sure that empty wal is moved to
// oldWALs directory.
Waiter.waitFor(CONF, 5000, (Waiter.Predicate<Exception>) () -> abstractWAL.getInflightWALCloseCount() == 0);
// There will 2 logs in the queue.
assertEquals(2, logQueue.getQueueSize(fakeWalGroupId));
// Get the archived dir path for the first wal.
Path archivePath = AbstractFSWALProvider.findArchivedLog(emptyLogFile, CONF);
// Make sure that the wal path is not the same as archived Dir path.
assertNotNull(archivePath);
assertTrue(fs.exists(archivePath));
fs.truncate(archivePath, 0);
// make sure the size of the wal file is 0.
assertEquals(0, fs.getFileStatus(archivePath).getLen());
ReplicationSourceManager mockSourceManager = Mockito.mock(ReplicationSourceManager.class);
ReplicationSource source = Mockito.mock(ReplicationSource.class);
when(source.isPeerEnabled()).thenReturn(true);
when(mockSourceManager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
Configuration localConf = new Configuration(CONF);
localConf.setInt("replication.source.maxretriesmultiplier", 1);
localConf.setBoolean("replication.source.eof.autorecovery", true);
// Start the reader thread.
createReader(false, localConf);
// Wait for the replication queue size to be 1. This means that we have handled
// 0 length wal from oldWALs directory.
Waiter.waitFor(localConf, 10000, (Waiter.Predicate<Exception>) () -> logQueue.getQueueSize(fakeWalGroupId) == 1);
}
use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.
the class TestDrainReplicationQueuesForStandBy method test.
@Test
public void test() throws Exception {
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.ACTIVE);
UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
write(UTIL1, 0, 100);
HRegionServer rs = UTIL1.getRSForFirstRegionInTable(TABLE_NAME);
String walGroupId = AbstractFSWALProvider.getWALPrefixFromWALName(((AbstractFSWAL<?>) rs.getWAL(RegionInfoBuilder.newBuilder(TABLE_NAME).build())).getCurrentFileName().getName());
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.DOWNGRADE_ACTIVE);
// transit cluster2 to DA and cluster 1 to S
verify(UTIL2, 0, 100);
UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
// delete the original value, and then major compact
try (Table table = UTIL2.getConnection().getTable(TABLE_NAME)) {
for (int i = 0; i < 100; i++) {
table.delete(new Delete(Bytes.toBytes(i)));
}
}
UTIL2.flush(TABLE_NAME);
UTIL2.compact(TABLE_NAME, true);
// wait until the new values are replicated back to cluster1
HRegion region = rs.getRegions(TABLE_NAME).get(0);
UTIL1.waitFor(30000, new ExplainingPredicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return region.get(new Get(Bytes.toBytes(99))).isEmpty();
}
@Override
public String explainFailure() throws Exception {
return "Replication has not been catched up yet";
}
});
// transit cluster1 to DA and cluster2 to S, then we will start replicating from cluster1 to
// cluster2
UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.DOWNGRADE_ACTIVE);
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID, SyncReplicationState.STANDBY);
UTIL1.getAdmin().enableReplicationPeer(PEER_ID);
// confirm that we will not replicate the old data which causes inconsistency
ReplicationSource source = (ReplicationSource) ((Replication) rs.getReplicationSourceService()).getReplicationManager().getSource(PEER_ID);
UTIL1.waitFor(30000, new ExplainingPredicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return !source.workerThreads.containsKey(walGroupId);
}
@Override
public String explainFailure() throws Exception {
return "Replication has not been catched up yet";
}
});
HRegion region2 = UTIL2.getMiniHBaseCluster().getRegions(TABLE_NAME).get(0);
for (int i = 0; i < 100; i++) {
assertTrue(region2.get(new Get(Bytes.toBytes(i))).isEmpty());
}
}
use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.
the class TestWALOpenAfterDNRollingStart method test.
/**
* see HBASE-18132 This is a test case of failing open a wal(for replication for example) after
* all datanode restarted (rolling upgrade, for example). Before this patch, low replication
* detection is only used when syncing wal. But if the wal haven't had any entry whiten, it will
* never know all the replica of the wal is broken(because of dn restarting). And this wal can
* never be open
* @throws Exception
*/
@Test
public void test() throws Exception {
HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
AbstractFSWAL<?> wal = (AbstractFSWAL<?>) server.getWAL(null);
Path currentFile = wal.getCurrentFileName();
// restart every dn to simulate a dn rolling upgrade
for (int i = 0, n = TEST_UTIL.getDFSCluster().getDataNodes().size(); i < n; i++) {
// This is NOT a bug, when restart dn in miniDFSCluster, it will remove the stopped dn from
// the dn list and then add to the tail of this list, we need to always restart the first one
// to simulate rolling upgrade of every dn.
TEST_UTIL.getDFSCluster().restartDataNode(0);
// sleep enough time so log roller can detect the pipeline break and roll log
Thread.sleep(DN_RESTART_INTERVAL);
}
if (!server.getFileSystem().exists(currentFile)) {
Path walRootDir = CommonFSUtils.getWALRootDir(TEST_UTIL.getConfiguration());
final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
currentFile = new Path(oldLogDir, currentFile.getName());
}
// if the log is not rolled, then we can never open this wal forever.
try (WAL.Reader reader = WALFactory.createReader(TEST_UTIL.getTestFileSystem(), currentFile, TEST_UTIL.getConfiguration())) {
reader.next();
}
}
use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.
the class AbstractFSWALProvider method extractFileNumFromWAL.
/**
* It returns the file create timestamp (the 'FileNum') from the file name. For name format see
* {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal
* @param wal must not be null
* @return the file number that is part of the WAL file name
*/
public static long extractFileNumFromWAL(final WAL wal) {
final Path walPath = ((AbstractFSWAL<?>) wal).getCurrentFileName();
if (walPath == null) {
throw new IllegalArgumentException("The WAL path couldn't be null");
}
String name = walPath.getName();
long timestamp = getTimestamp(name);
if (timestamp == NO_TIMESTAMP) {
throw new IllegalArgumentException(name + " is not a valid wal file name");
}
return timestamp;
}
use of org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL in project hbase by apache.
the class AbstractWALRoller method checkLowReplication.
/**
* we need to check low replication in period, see HBASE-18132
*/
private void checkLowReplication(long now) {
try {
for (Entry<WAL, RollController> entry : wals.entrySet()) {
WAL wal = entry.getKey();
boolean needRollAlready = entry.getValue().needsRoll(now);
if (needRollAlready || !(wal instanceof AbstractFSWAL)) {
continue;
}
((AbstractFSWAL<?>) wal).checkLogLowReplication(checkLowReplicationInterval);
}
} catch (Throwable e) {
LOG.warn("Failed checking low replication", e);
}
}
Aggregations