use of org.apache.hadoop.hbase.wal.WALProvider.Writer in project hbase by apache.
the class TestWALLockup method testLockupWhenSyncInMiddleOfZigZagSetup.
/**
* Reproduce locking up that happens when we get an inopportune sync during setup for
* zigzaglatch wait. See HBASE-14317. If below is broken, we will see this test timeout because
* it is locked up.
* <p>First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test(timeout = 20000)
public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
// A WAL that we can have throw exceptions when a flag is set.
class DodgyFSLog extends FSHLog {
// Set this when want the WAL to start throwing exceptions.
volatile boolean throwException = false;
// Latch to hold up processing until after another operation has had time to run.
CountDownLatch latch = new CountDownLatch(1);
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
protected void afterCreatingZigZagLatch() {
// the lock up we've seen in production.
if (throwException) {
try {
LOG.info("LATCHED");
// because all WALs have rolled. In this case, just give up on test.
if (!this.latch.await(5, TimeUnit.SECONDS)) {
LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
}
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
@Override
protected void beforeWaitOnSafePoint() {
if (throwException) {
LOG.info("COUNTDOWN");
// be stuck; test won't go down
while (this.latch.getCount() <= 0) Threads.sleep(1);
this.latch.countDown();
}
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
}
// Mocked up server and regionserver services. Needed below.
Server server = Mockito.mock(Server.class);
Mockito.when(server.getConfiguration()).thenReturn(CONF);
Mockito.when(server.isStopped()).thenReturn(false);
Mockito.when(server.isAborted()).thenReturn(false);
RegionServerServices services = Mockito.mock(RegionServerServices.class);
// OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
Path originalWAL = dodgyWAL.getCurrentFileName();
// I need a log roller running.
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL);
// There is no 'stop' once a logRoller is running.. it just dies.
logRoller.start();
// Now get a region and start adding in edits.
HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
byte[] bytes = Bytes.toBytes(getName());
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
scopes.put(COLUMN_FAMILY_BYTES, 0);
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
try {
// First get something into memstore. Make a Put and then pull the Cell out of it. Will
// manage append and sync carefully in below to manufacture hang. We keep adding same
// edit. WAL subsystem doesn't care.
Put put = new Put(bytes);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
WALEdit edit = new WALEdit();
CellScanner CellScanner = put.cellScanner();
assertTrue(CellScanner.advance());
edit.add(CellScanner.current());
// out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
for (int i = 0; i < 1000; i++) {
region.put(put);
}
// Set it so we start throwing exceptions.
LOG.info("SET throwing of exception on append");
dodgyWAL.throwException = true;
// This append provokes a WAL roll request
dodgyWAL.append(region.getRegionInfo(), key, edit, true);
boolean exception = false;
try {
dodgyWAL.sync();
} catch (Exception e) {
exception = true;
}
assertTrue("Did not get sync exception", exception);
// Get a memstore flush going too so we have same hung profile as up in the issue over
// in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
// by the zigzaglatch waiting on syncs to come home.
Thread t = new Thread("Flusher") {
public void run() {
try {
if (region.getMemstoreSize() <= 0) {
throw new IOException("memstore size=" + region.getMemstoreSize());
}
region.flush(false);
} catch (IOException e) {
// Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
// when roll completes.
LOG.info("In flush", e);
}
LOG.info("Exiting");
}
;
};
t.setDaemon(true);
t.start();
// Wait until
while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
// Now assert I got a new WAL file put in place even though loads of errors above.
assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
// Can I append to it?
dodgyWAL.throwException = false;
try {
region.put(put);
} catch (Exception e) {
LOG.info("In the put", e);
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(server.isStopped()).thenReturn(true);
if (logRoller != null)
logRoller.close();
try {
if (region != null)
region.close();
if (dodgyWAL != null)
dodgyWAL.close();
} catch (Exception e) {
LOG.info("On way out", e);
}
}
}
use of org.apache.hadoop.hbase.wal.WALProvider.Writer in project hbase by apache.
the class TestHRegion method testFlushMarkers.
@Test
public void testFlushMarkers() throws Exception {
// tests that flush markers are written to WAL and handled at recovered edits
byte[] family = Bytes.toBytes("family");
Path logDir = TEST_UTIL.getDataTestDirOnTestFS(method + ".log");
final Configuration walConf = new Configuration(TEST_UTIL.getConfiguration());
FSUtils.setRootDir(walConf, logDir);
final WALFactory wals = new WALFactory(walConf, null, method);
final WAL wal = wals.getWAL(tableName.getName(), tableName.getNamespace());
this.region = initHRegion(tableName, HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, Durability.USE_DEFAULT, wal, family);
try {
Path regiondir = region.getRegionFileSystem().getRegionDir();
FileSystem fs = region.getRegionFileSystem().getFileSystem();
byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes();
long maxSeqId = 3;
long minSeqId = 0;
for (long i = minSeqId; i < maxSeqId; i++) {
Put put = new Put(Bytes.toBytes(i));
put.addColumn(family, Bytes.toBytes(i), Bytes.toBytes(i));
region.put(put);
region.flush(true);
}
// this will create a region with 3 files from flush
assertEquals(3, region.getStore(family).getStorefilesCount());
List<String> storeFiles = new ArrayList<>(3);
for (StoreFile sf : region.getStore(family).getStorefiles()) {
storeFiles.add(sf.getPath().getName());
}
// now verify that the flush markers are written
wal.shutdown();
WAL.Reader reader = WALFactory.createReader(fs, AbstractFSWALProvider.getCurrentFileName(wal), TEST_UTIL.getConfiguration());
try {
List<WAL.Entry> flushDescriptors = new ArrayList<>();
long lastFlushSeqId = -1;
while (true) {
WAL.Entry entry = reader.next();
if (entry == null) {
break;
}
Cell cell = entry.getEdit().getCells().get(0);
if (WALEdit.isMetaEditFamily(cell)) {
FlushDescriptor flushDesc = WALEdit.getFlushDescriptor(cell);
assertNotNull(flushDesc);
assertArrayEquals(tableName.getName(), flushDesc.getTableName().toByteArray());
if (flushDesc.getAction() == FlushAction.START_FLUSH) {
assertTrue(flushDesc.getFlushSequenceNumber() > lastFlushSeqId);
} else if (flushDesc.getAction() == FlushAction.COMMIT_FLUSH) {
assertTrue(flushDesc.getFlushSequenceNumber() == lastFlushSeqId);
}
lastFlushSeqId = flushDesc.getFlushSequenceNumber();
assertArrayEquals(regionName, flushDesc.getEncodedRegionName().toByteArray());
//only one store
assertEquals(1, flushDesc.getStoreFlushesCount());
StoreFlushDescriptor storeFlushDesc = flushDesc.getStoreFlushes(0);
assertArrayEquals(family, storeFlushDesc.getFamilyName().toByteArray());
assertEquals("family", storeFlushDesc.getStoreHomeDir());
if (flushDesc.getAction() == FlushAction.START_FLUSH) {
assertEquals(0, storeFlushDesc.getFlushOutputCount());
} else {
//only one file from flush
assertEquals(1, storeFlushDesc.getFlushOutputCount());
assertTrue(storeFiles.contains(storeFlushDesc.getFlushOutput(0)));
}
flushDescriptors.add(entry);
}
}
// START_FLUSH and COMMIT_FLUSH per flush
assertEquals(3 * 2, flushDescriptors.size());
// now write those markers to the recovered edits again.
Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", 1000));
fs.create(recoveredEdits);
WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits);
for (WAL.Entry entry : flushDescriptors) {
writer.append(entry);
}
writer.close();
} finally {
if (null != reader) {
try {
reader.close();
} catch (IOException exception) {
LOG.warn("Problem closing wal: " + exception.getMessage());
LOG.debug("exception details", exception);
}
}
}
// close the region now, and reopen again
region.close();
region = HRegion.openHRegion(region, null);
// now check whether we have can read back the data from region
for (long i = minSeqId; i < maxSeqId; i++) {
Get get = new Get(Bytes.toBytes(i));
Result result = region.get(get);
byte[] value = result.getValue(family, Bytes.toBytes(i));
assertArrayEquals(Bytes.toBytes(i), value);
}
} finally {
HBaseTestingUtility.closeRegionAndWAL(this.region);
this.region = null;
wals.close();
}
}
use of org.apache.hadoop.hbase.wal.WALProvider.Writer in project hbase by apache.
the class TestHRegion method testRecoveredEditsReplayCompaction.
public void testRecoveredEditsReplayCompaction(boolean mismatchedRegionName) throws Exception {
byte[] family = Bytes.toBytes("family");
this.region = initHRegion(tableName, method, CONF, family);
final WALFactory wals = new WALFactory(CONF, null, method);
try {
Path regiondir = region.getRegionFileSystem().getRegionDir();
FileSystem fs = region.getRegionFileSystem().getFileSystem();
byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes();
long maxSeqId = 3;
long minSeqId = 0;
for (long i = minSeqId; i < maxSeqId; i++) {
Put put = new Put(Bytes.toBytes(i));
put.addColumn(family, Bytes.toBytes(i), Bytes.toBytes(i));
region.put(put);
region.flush(true);
}
// this will create a region with 3 files
assertEquals(3, region.getStore(family).getStorefilesCount());
List<Path> storeFiles = new ArrayList<>(3);
for (StoreFile sf : region.getStore(family).getStorefiles()) {
storeFiles.add(sf.getPath());
}
// disable compaction completion
CONF.setBoolean("hbase.hstore.compaction.complete", false);
region.compactStores();
// ensure that nothing changed
assertEquals(3, region.getStore(family).getStorefilesCount());
// now find the compacted file, and manually add it to the recovered edits
Path tmpDir = new Path(region.getRegionFileSystem().getTempDir(), Bytes.toString(family));
FileStatus[] files = FSUtils.listStatus(fs, tmpDir);
String errorMsg = "Expected to find 1 file in the region temp directory " + "from the compaction, could not find any";
assertNotNull(errorMsg, files);
assertEquals(errorMsg, 1, files.length);
// move the file inside region dir
Path newFile = region.getRegionFileSystem().commitStoreFile(Bytes.toString(family), files[0].getPath());
byte[] encodedNameAsBytes = this.region.getRegionInfo().getEncodedNameAsBytes();
byte[] fakeEncodedNameAsBytes = new byte[encodedNameAsBytes.length];
for (int i = 0; i < encodedNameAsBytes.length; i++) {
// Mix the byte array to have a new encodedName
fakeEncodedNameAsBytes[i] = (byte) (encodedNameAsBytes[i] + 1);
}
CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(this.region.getRegionInfo(), mismatchedRegionName ? fakeEncodedNameAsBytes : null, family, storeFiles, Lists.newArrayList(newFile), region.getRegionFileSystem().getStoreDir(Bytes.toString(family)));
WALUtil.writeCompactionMarker(region.getWAL(), this.region.getReplicationScope(), this.region.getRegionInfo(), compactionDescriptor, region.getMVCC());
Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", 1000));
fs.create(recoveredEdits);
WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits);
long time = System.nanoTime();
writer.append(new WAL.Entry(new WALKey(regionName, tableName, 10, time, HConstants.DEFAULT_CLUSTER_ID), WALEdit.createCompaction(region.getRegionInfo(), compactionDescriptor)));
writer.close();
// close the region now, and reopen again
region.getTableDesc();
region.getRegionInfo();
region.close();
try {
region = HRegion.openHRegion(region, null);
} catch (WrongRegionException wre) {
fail("Matching encoded region name should not have produced WrongRegionException");
}
// now check whether we have only one store file, the compacted one
Collection<StoreFile> sfs = region.getStore(family).getStorefiles();
for (StoreFile sf : sfs) {
LOG.info(sf.getPath());
}
if (!mismatchedRegionName) {
assertEquals(1, region.getStore(family).getStorefilesCount());
}
files = FSUtils.listStatus(fs, tmpDir);
assertTrue("Expected to find 0 files inside " + tmpDir, files == null || files.length == 0);
for (long i = minSeqId; i < maxSeqId; i++) {
Get get = new Get(Bytes.toBytes(i));
Result result = region.get(get);
byte[] value = result.getValue(family, Bytes.toBytes(i));
assertArrayEquals(Bytes.toBytes(i), value);
}
} finally {
HBaseTestingUtility.closeRegionAndWAL(this.region);
this.region = null;
wals.close();
}
}
use of org.apache.hadoop.hbase.wal.WALProvider.Writer in project hbase by apache.
the class TestFailedAppendAndSync method testLockupAroundBadAssignSync.
/**
* Reproduce locking up that happens when we get an exceptions appending and syncing.
* See HBASE-14317.
* First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test(timeout = 300000)
public void testLockupAroundBadAssignSync() throws IOException {
final AtomicLong rolls = new AtomicLong(0);
// Dodgy WAL. Will throw exceptions when flags set.
class DodgyFSLog extends FSHLog {
volatile boolean throwSyncException = false;
volatile boolean throwAppendException = false;
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
byte[][] regions = super.rollWriter(force);
rolls.getAndIncrement();
return regions;
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwSyncException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwAppendException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
}
// Make up mocked server and services.
Server server = mock(Server.class);
when(server.getConfiguration()).thenReturn(CONF);
when(server.isStopped()).thenReturn(false);
when(server.isAborted()).thenReturn(false);
RegionServerServices services = mock(RegionServerServices.class);
// OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL);
logRoller.start();
boolean threwOnSync = false;
boolean threwOnAppend = false;
boolean threwOnBoth = false;
HRegion region = initHRegion(tableName, null, null, dodgyWAL);
try {
// Get some random bytes.
byte[] value = Bytes.toBytes(getName());
try {
// First get something into memstore
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
region.put(put);
} catch (IOException ioe) {
fail();
}
long rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = false;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
region.put(put);
} catch (IOException ioe) {
threwOnAppend = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
region.put(put);
} catch (IOException ioe) {
threwOnBoth = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
// Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
// to just continue.
// So, should be no abort at this stage. Verify.
Mockito.verify(server, Mockito.atLeast(0)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
try {
dodgyWAL.throwAppendException = false;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
region.put(put);
} catch (IOException ioe) {
threwOnSync = true;
}
// happens. If it don't we'll timeout the whole test. That is fine.
while (true) {
try {
Mockito.verify(server, Mockito.atLeast(1)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
break;
} catch (WantedButNotInvoked t) {
Threads.sleep(1);
}
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(server.isStopped()).thenReturn(true);
if (logRoller != null)
logRoller.close();
if (region != null) {
try {
region.close(true);
} catch (DroppedSnapshotException e) {
LOG.info("On way out; expected!", e);
}
}
if (dodgyWAL != null)
dodgyWAL.close();
assertTrue("The regionserver should have thrown an exception", threwOnBoth);
assertTrue("The regionserver should have thrown an exception", threwOnAppend);
assertTrue("The regionserver should have thrown an exception", threwOnSync);
}
}
use of org.apache.hadoop.hbase.wal.WALProvider.Writer in project hbase by apache.
the class TestHRegion method testSkipRecoveredEditsReplay.
@Test
public void testSkipRecoveredEditsReplay() throws Exception {
byte[] family = Bytes.toBytes("family");
this.region = initHRegion(tableName, method, CONF, family);
final WALFactory wals = new WALFactory(CONF, null, method);
try {
Path regiondir = region.getRegionFileSystem().getRegionDir();
FileSystem fs = region.getRegionFileSystem().getFileSystem();
byte[] regionName = region.getRegionInfo().getEncodedNameAsBytes();
Path recoveredEditsDir = WALSplitter.getRegionDirRecoveredEditsDir(regiondir);
long maxSeqId = 1050;
long minSeqId = 1000;
for (long i = minSeqId; i <= maxSeqId; i += 10) {
Path recoveredEdits = new Path(recoveredEditsDir, String.format("%019d", i));
fs.create(recoveredEdits);
WALProvider.Writer writer = wals.createRecoveredEditsWriter(fs, recoveredEdits);
long time = System.nanoTime();
WALEdit edit = new WALEdit();
edit.add(new KeyValue(row, family, Bytes.toBytes(i), time, KeyValue.Type.Put, Bytes.toBytes(i)));
writer.append(new WAL.Entry(new WALKey(regionName, tableName, i, time, HConstants.DEFAULT_CLUSTER_ID), edit));
writer.close();
}
MonitoredTask status = TaskMonitor.get().createStatus(method);
Map<byte[], Long> maxSeqIdInStores = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for (Store store : region.getStores()) {
maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(), minSeqId - 1);
}
long seqId = region.replayRecoveredEditsIfAny(regiondir, maxSeqIdInStores, null, status);
assertEquals(maxSeqId, seqId);
region.getMVCC().advanceTo(seqId);
Get get = new Get(row);
Result result = region.get(get);
for (long i = minSeqId; i <= maxSeqId; i += 10) {
List<Cell> kvs = result.getColumnCells(family, Bytes.toBytes(i));
assertEquals(1, kvs.size());
assertArrayEquals(Bytes.toBytes(i), CellUtil.cloneValue(kvs.get(0)));
}
} finally {
HBaseTestingUtility.closeRegionAndWAL(this.region);
this.region = null;
wals.close();
}
}
Aggregations