use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.
the class TestWALLockup method testLockupWhenSyncInMiddleOfZigZagSetup.
/**
* Reproduce locking up that happens when we get an inopportune sync during setup for
* zigzaglatch wait. See HBASE-14317. If below is broken, we will see this test timeout because
* it is locked up.
* <p>First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test(timeout = 20000)
public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
// A WAL that we can have throw exceptions when a flag is set.
class DodgyFSLog extends FSHLog {
// Set this when want the WAL to start throwing exceptions.
volatile boolean throwException = false;
// Latch to hold up processing until after another operation has had time to run.
CountDownLatch latch = new CountDownLatch(1);
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
protected void afterCreatingZigZagLatch() {
// the lock up we've seen in production.
if (throwException) {
try {
LOG.info("LATCHED");
// because all WALs have rolled. In this case, just give up on test.
if (!this.latch.await(5, TimeUnit.SECONDS)) {
LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
}
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
@Override
protected void beforeWaitOnSafePoint() {
if (throwException) {
LOG.info("COUNTDOWN");
// be stuck; test won't go down
while (this.latch.getCount() <= 0) Threads.sleep(1);
this.latch.countDown();
}
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
}
// Mocked up server and regionserver services. Needed below.
Server server = Mockito.mock(Server.class);
Mockito.when(server.getConfiguration()).thenReturn(CONF);
Mockito.when(server.isStopped()).thenReturn(false);
Mockito.when(server.isAborted()).thenReturn(false);
RegionServerServices services = Mockito.mock(RegionServerServices.class);
// OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
Path originalWAL = dodgyWAL.getCurrentFileName();
// I need a log roller running.
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL);
// There is no 'stop' once a logRoller is running.. it just dies.
logRoller.start();
// Now get a region and start adding in edits.
HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
byte[] bytes = Bytes.toBytes(getName());
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
scopes.put(COLUMN_FAMILY_BYTES, 0);
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
try {
// First get something into memstore. Make a Put and then pull the Cell out of it. Will
// manage append and sync carefully in below to manufacture hang. We keep adding same
// edit. WAL subsystem doesn't care.
Put put = new Put(bytes);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
WALEdit edit = new WALEdit();
CellScanner CellScanner = put.cellScanner();
assertTrue(CellScanner.advance());
edit.add(CellScanner.current());
// out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
for (int i = 0; i < 1000; i++) {
region.put(put);
}
// Set it so we start throwing exceptions.
LOG.info("SET throwing of exception on append");
dodgyWAL.throwException = true;
// This append provokes a WAL roll request
dodgyWAL.append(region.getRegionInfo(), key, edit, true);
boolean exception = false;
try {
dodgyWAL.sync();
} catch (Exception e) {
exception = true;
}
assertTrue("Did not get sync exception", exception);
// Get a memstore flush going too so we have same hung profile as up in the issue over
// in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
// by the zigzaglatch waiting on syncs to come home.
Thread t = new Thread("Flusher") {
public void run() {
try {
if (region.getMemstoreSize() <= 0) {
throw new IOException("memstore size=" + region.getMemstoreSize());
}
region.flush(false);
} catch (IOException e) {
// Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
// when roll completes.
LOG.info("In flush", e);
}
LOG.info("Exiting");
}
;
};
t.setDaemon(true);
t.start();
// Wait until
while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
// Now assert I got a new WAL file put in place even though loads of errors above.
assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
// Can I append to it?
dodgyWAL.throwException = false;
try {
region.put(put);
} catch (Exception e) {
LOG.info("In the put", e);
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(server.isStopped()).thenReturn(true);
if (logRoller != null)
logRoller.close();
try {
if (region != null)
region.close();
if (dodgyWAL != null)
dodgyWAL.close();
} catch (Exception e) {
LOG.info("On way out", e);
}
}
}
use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.
the class TestCompactionPolicy method initialize.
/**
* Setting up a Store
* @throws IOException with error
*/
protected void initialize() throws IOException {
Path basedir = new Path(DIR);
String logName = "logs";
Path logdir = new Path(DIR, logName);
HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("family"));
FileSystem fs = FileSystem.get(conf);
fs.delete(logdir, true);
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(Bytes.toBytes("table")));
htd.addFamily(hcd);
HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
hlog = new FSHLog(fs, basedir, logName, conf);
region = HRegion.createHRegion(info, basedir, conf, htd, hlog);
region.close();
Path tableDir = FSUtils.getTableDir(basedir, htd.getTableName());
region = new HRegion(tableDir, hlog, fs, conf, info, htd, null);
store = new HStore(region, hcd, conf);
TEST_FILE = region.getRegionFileSystem().createTempName();
fs.createNewFile(TEST_FILE);
}
use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.
the class TestFailedAppendAndSync method testLockupAroundBadAssignSync.
/**
* Reproduce locking up that happens when we get an exceptions appending and syncing.
* See HBASE-14317.
* First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test(timeout = 300000)
public void testLockupAroundBadAssignSync() throws IOException {
final AtomicLong rolls = new AtomicLong(0);
// Dodgy WAL. Will throw exceptions when flags set.
class DodgyFSLog extends FSHLog {
volatile boolean throwSyncException = false;
volatile boolean throwAppendException = false;
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
byte[][] regions = super.rollWriter(force);
rolls.getAndIncrement();
return regions;
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwSyncException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwAppendException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
}
// Make up mocked server and services.
Server server = mock(Server.class);
when(server.getConfiguration()).thenReturn(CONF);
when(server.isStopped()).thenReturn(false);
when(server.isAborted()).thenReturn(false);
RegionServerServices services = mock(RegionServerServices.class);
// OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL);
logRoller.start();
boolean threwOnSync = false;
boolean threwOnAppend = false;
boolean threwOnBoth = false;
HRegion region = initHRegion(tableName, null, null, dodgyWAL);
try {
// Get some random bytes.
byte[] value = Bytes.toBytes(getName());
try {
// First get something into memstore
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
region.put(put);
} catch (IOException ioe) {
fail();
}
long rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = false;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
region.put(put);
} catch (IOException ioe) {
threwOnAppend = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
region.put(put);
} catch (IOException ioe) {
threwOnBoth = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
// Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
// to just continue.
// So, should be no abort at this stage. Verify.
Mockito.verify(server, Mockito.atLeast(0)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
try {
dodgyWAL.throwAppendException = false;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
region.put(put);
} catch (IOException ioe) {
threwOnSync = true;
}
// happens. If it don't we'll timeout the whole test. That is fine.
while (true) {
try {
Mockito.verify(server, Mockito.atLeast(1)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
break;
} catch (WantedButNotInvoked t) {
Threads.sleep(1);
}
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(server.isStopped()).thenReturn(true);
if (logRoller != null)
logRoller.close();
if (region != null) {
try {
region.close(true);
} catch (DroppedSnapshotException e) {
LOG.info("On way out; expected!", e);
}
}
if (dodgyWAL != null)
dodgyWAL.close();
assertTrue("The regionserver should have thrown an exception", threwOnBoth);
assertTrue("The regionserver should have thrown an exception", threwOnAppend);
assertTrue("The regionserver should have thrown an exception", threwOnSync);
}
}
use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.
the class TestHRegion method testMemstoreSizeWithFlushCanceling.
/**
* Test for HBASE-14229: Flushing canceled by coprocessor still leads to memstoreSize set down
*/
@Test
public void testMemstoreSizeWithFlushCanceling() throws IOException {
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + "testMemstoreSizeWithFlushCanceling");
FSHLog hLog = new FSHLog(fs, rootDir, "testMemstoreSizeWithFlushCanceling", CONF);
HRegion region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, hLog, COLUMN_FAMILY_BYTES);
Store store = region.getStore(COLUMN_FAMILY_BYTES);
assertEquals(0, region.getMemstoreSize());
// Put some value and make sure flush could be completed normally
byte[] value = Bytes.toBytes(method);
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value);
region.put(put);
long onePutSize = region.getMemstoreSize();
assertTrue(onePutSize > 0);
region.flush(true);
assertEquals("memstoreSize should be zero", 0, region.getMemstoreSize());
assertEquals("flushable size should be zero", 0, store.getSizeToFlush().getDataSize());
// save normalCPHost and replaced by mockedCPHost, which will cancel flush requests
RegionCoprocessorHost normalCPHost = region.getCoprocessorHost();
RegionCoprocessorHost mockedCPHost = Mockito.mock(RegionCoprocessorHost.class);
when(mockedCPHost.preFlush(Mockito.isA(HStore.class), Mockito.isA(InternalScanner.class))).thenReturn(null);
region.setCoprocessorHost(mockedCPHost);
region.put(put);
region.flush(true);
assertEquals("memstoreSize should NOT be zero", onePutSize, region.getMemstoreSize());
assertEquals("flushable size should NOT be zero", onePutSize, store.getSizeToFlush().getDataSize());
// set normalCPHost and flush again, the snapshot will be flushed
region.setCoprocessorHost(normalCPHost);
region.flush(true);
assertEquals("memstoreSize should be zero", 0, region.getMemstoreSize());
assertEquals("flushable size should be zero", 0, store.getSizeToFlush().getDataSize());
HBaseTestingUtility.closeRegionAndWAL(region);
}
use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.
the class TestHRegion method testMemstoreSizeAccountingWithFailedPostBatchMutate.
@Test
public void testMemstoreSizeAccountingWithFailedPostBatchMutate() throws IOException {
String testName = "testMemstoreSizeAccountingWithFailedPostBatchMutate";
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + testName);
FSHLog hLog = new FSHLog(fs, rootDir, testName, CONF);
HRegion region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, hLog, COLUMN_FAMILY_BYTES);
Store store = region.getStore(COLUMN_FAMILY_BYTES);
assertEquals(0, region.getMemstoreSize());
// Put one value
byte[] value = Bytes.toBytes(method);
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value);
region.put(put);
long onePutSize = region.getMemstoreSize();
assertTrue(onePutSize > 0);
RegionCoprocessorHost mockedCPHost = Mockito.mock(RegionCoprocessorHost.class);
doThrow(new IOException()).when(mockedCPHost).postBatchMutate(Mockito.<MiniBatchOperationInProgress<Mutation>>any());
region.setCoprocessorHost(mockedCPHost);
put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("dfg"), value);
try {
region.put(put);
fail("Should have failed with IOException");
} catch (IOException expected) {
}
long expectedSize = onePutSize * 2;
assertEquals("memstoreSize should be incremented", expectedSize, region.getMemstoreSize());
assertEquals("flushable size should be incremented", expectedSize, store.getSizeToFlush().getDataSize());
region.setCoprocessorHost(null);
HBaseTestingUtility.closeRegionAndWAL(region);
}
Aggregations