use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class RSRpcServices method flushRegion.
/**
* Flush a region on the region server.
*
* @param controller the RPC controller
* @param request the request
* @throws ServiceException
*/
@Override
@QosPriority(priority = HConstants.ADMIN_QOS)
public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException {
try {
checkOpen();
requestCount.increment();
Region region = getRegion(request.getRegion());
LOG.info("Flushing " + region.getRegionInfo().getRegionNameAsString());
boolean shouldFlush = true;
if (request.hasIfOlderThanTs()) {
shouldFlush = region.getEarliestFlushTimeForAllStores() < request.getIfOlderThanTs();
}
FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder();
if (shouldFlush) {
boolean writeFlushWalMarker = request.hasWriteFlushWalMarker() ? request.getWriteFlushWalMarker() : false;
// Go behind the curtain so we can manage writing of the flush WAL marker
HRegion.FlushResultImpl flushResult = (HRegion.FlushResultImpl) ((HRegion) region).flushcache(true, writeFlushWalMarker);
boolean compactionNeeded = flushResult.isCompactionNeeded();
if (compactionNeeded) {
regionServer.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush");
}
builder.setFlushed(flushResult.isFlushSucceeded());
builder.setWroteFlushWalMarker(flushResult.wroteFlushWalMarker);
}
builder.setLastFlushTime(region.getEarliestFlushTimeForAllStores());
return builder.build();
} catch (DroppedSnapshotException ex) {
// Cache flush can fail in a few places. If it fails in a critical
// section, we get a DroppedSnapshotException and a replay of wal
// is required. Currently the only way to do this is a restart of
// the server.
regionServer.abort("Replay of WAL required. Forcing server shutdown", ex);
throw new ServiceException(ex);
} catch (IOException ie) {
throw new ServiceException(ie);
}
}
use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class MemStoreFlusher method flushRegion.
/**
* Flush a region.
* @param region Region to flush.
* @param emergencyFlush Set if we are being force flushed. If true the region
* needs to be removed from the flush queue. If false, when we were called
* from the main flusher run loop and we got the entry to flush by calling
* poll on the flush queue (which removed it).
* @param forceFlushAllStores whether we want to flush all store.
* @return true if the region was successfully flushed, false otherwise. If
* false, there will be accompanying log messages explaining why the region was
* not flushed.
*/
private boolean flushRegion(final Region region, final boolean emergencyFlush, boolean forceFlushAllStores) {
synchronized (this.regionsInQueue) {
FlushRegionEntry fqe = this.regionsInQueue.remove(region);
// Use the start time of the FlushRegionEntry if available
if (fqe != null && emergencyFlush) {
// Need to remove from region from delay queue. When NOT an
// emergencyFlush, then item was removed via a flushQueue.poll.
flushQueue.remove(fqe);
}
}
lock.readLock().lock();
try {
notifyFlushRequest(region, emergencyFlush);
FlushResult flushResult = region.flush(forceFlushAllStores);
boolean shouldCompact = flushResult.isCompactionNeeded();
// We just want to check the size
boolean shouldSplit = ((HRegion) region).checkSplit() != null;
if (shouldSplit) {
this.server.compactSplitThread.requestSplit(region);
} else if (shouldCompact) {
server.compactSplitThread.requestSystemCompaction(region, Thread.currentThread().getName());
}
} catch (DroppedSnapshotException ex) {
// Cache flush can fail in a few places. If it fails in a critical
// section, we get a DroppedSnapshotException and a replay of wal
// is required. Currently the only way to do this is a restart of
// the server. Abort because hdfs is probably bad (HBASE-644 is a case
// where hdfs was bad but passed the hdfs check).
server.abort("Replay of WAL required. Forcing server shutdown", ex);
return false;
} catch (IOException ex) {
ex = ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex;
LOG.error("Cache flush failed" + (region != null ? (" for region " + Bytes.toStringBinary(region.getRegionInfo().getRegionName())) : ""), ex);
if (!server.checkFileSystem()) {
return false;
}
} finally {
lock.readLock().unlock();
wakeUpIfBlocking();
}
return true;
}
use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class TestSplitWalDataLoss method test.
@Test
public void test() throws IOException, InterruptedException {
final HRegionServer rs = testUtil.getRSForFirstRegionInTable(tableName);
final HRegion region = (HRegion) rs.getOnlineRegions(tableName).get(0);
HRegion spiedRegion = spy(region);
final MutableBoolean flushed = new MutableBoolean(false);
final MutableBoolean reported = new MutableBoolean(false);
doAnswer(new Answer<FlushResult>() {
@Override
public FlushResult answer(InvocationOnMock invocation) throws Throwable {
synchronized (flushed) {
flushed.setValue(true);
flushed.notifyAll();
}
synchronized (reported) {
while (!reported.booleanValue()) {
reported.wait();
}
}
rs.getWAL(region.getRegionInfo()).abortCacheFlush(region.getRegionInfo().getEncodedNameAsBytes());
throw new DroppedSnapshotException("testcase");
}
}).when(spiedRegion).internalFlushCacheAndCommit(Matchers.<WAL>any(), Matchers.<MonitoredTask>any(), Matchers.<PrepareFlushResult>any(), Matchers.<Collection<Store>>any());
// Find region key; don't pick up key for hbase:meta by mistake.
String key = null;
for (Map.Entry<String, Region> entry : rs.onlineRegions.entrySet()) {
if (entry.getValue().getRegionInfo().getTable().equals(this.tableName)) {
key = entry.getKey();
break;
}
}
rs.onlineRegions.put(key, spiedRegion);
Connection conn = testUtil.getConnection();
try (Table table = conn.getTable(tableName)) {
table.put(new Put(Bytes.toBytes("row0")).addColumn(family, qualifier, Bytes.toBytes("val0")));
}
long oldestSeqIdOfStore = region.getOldestSeqIdOfStore(family);
LOG.info("CHANGE OLDEST " + oldestSeqIdOfStore);
assertTrue(oldestSeqIdOfStore > HConstants.NO_SEQNUM);
rs.cacheFlusher.requestFlush(spiedRegion, false);
synchronized (flushed) {
while (!flushed.booleanValue()) {
flushed.wait();
}
}
try (Table table = conn.getTable(tableName)) {
table.put(new Put(Bytes.toBytes("row1")).addColumn(family, qualifier, Bytes.toBytes("val1")));
}
long now = EnvironmentEdgeManager.currentTime();
rs.tryRegionServerReport(now - 500, now);
synchronized (reported) {
reported.setValue(true);
reported.notifyAll();
}
while (testUtil.getRSForFirstRegionInTable(tableName) == rs) {
Thread.sleep(100);
}
try (Table table = conn.getTable(tableName)) {
Result result = table.get(new Get(Bytes.toBytes("row0")));
assertArrayEquals(Bytes.toBytes("val0"), result.getValue(family, qualifier));
}
}
use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class TestHRegion method testCloseWithFailingFlush.
@Test
public void testCloseWithFailingFlush() throws Exception {
final Configuration conf = HBaseConfiguration.create(CONF);
final WAL wal = createWALCompatibleWithFaultyFileSystem(method, conf, tableName);
// Only retry once.
conf.setInt("hbase.hstore.flush.retries.number", 1);
final User user = User.createUserForTesting(conf, this.method, new String[] { "foo" });
// Inject our faulty LocalFileSystem
conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class);
user.runAs(new PrivilegedExceptionAction<Object>() {
@Override
public Object run() throws Exception {
// Make sure it worked (above is sensitive to caching details in hadoop core)
FileSystem fs = FileSystem.get(conf);
Assert.assertEquals(FaultyFileSystem.class, fs.getClass());
FaultyFileSystem ffs = (FaultyFileSystem) fs;
HRegion region = null;
try {
// Initialize region
region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
long size = region.getMemstoreSize();
Assert.assertEquals(0, size);
// Put one item into memstore. Measure the size of one item in memstore.
Put p1 = new Put(row);
p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[]) null));
region.put(p1);
// Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only.
Store store = region.getStore(COLUMN_FAMILY_BYTES);
StoreFlushContext storeFlushCtx = store.createFlushContext(12345);
storeFlushCtx.prepare();
// Now add two entries to the foreground memstore.
Put p2 = new Put(row);
p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[]) null));
p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[]) null));
region.put(p2);
// Now try close on top of a failing flush.
region.close();
fail();
} catch (DroppedSnapshotException dse) {
// Expected
LOG.info("Expected DroppedSnapshotException");
} finally {
// Make it so all writes succeed from here on out so can close clean
ffs.fault.set(false);
HBaseTestingUtility.closeRegionAndWAL(region);
}
return null;
}
});
FileSystem.closeAllForUGI(user.getUGI());
}
use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.
the class TestFailedAppendAndSync method testLockupAroundBadAssignSync.
/**
* Reproduce locking up that happens when we get an exceptions appending and syncing.
* See HBASE-14317.
* First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test(timeout = 300000)
public void testLockupAroundBadAssignSync() throws IOException {
final AtomicLong rolls = new AtomicLong(0);
// Dodgy WAL. Will throw exceptions when flags set.
class DodgyFSLog extends FSHLog {
volatile boolean throwSyncException = false;
volatile boolean throwAppendException = false;
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
byte[][] regions = super.rollWriter(force);
rolls.getAndIncrement();
return regions;
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwSyncException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwAppendException) {
throw new IOException("FAKE! Failed to replace a bad datanode...");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
}
// Make up mocked server and services.
Server server = mock(Server.class);
when(server.getConfiguration()).thenReturn(CONF);
when(server.isStopped()).thenReturn(false);
when(server.isAborted()).thenReturn(false);
RegionServerServices services = mock(RegionServerServices.class);
// OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
// the test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL);
logRoller.start();
boolean threwOnSync = false;
boolean threwOnAppend = false;
boolean threwOnBoth = false;
HRegion region = initHRegion(tableName, null, null, dodgyWAL);
try {
// Get some random bytes.
byte[] value = Bytes.toBytes(getName());
try {
// First get something into memstore
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
region.put(put);
} catch (IOException ioe) {
fail();
}
long rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = false;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
region.put(put);
} catch (IOException ioe) {
threwOnAppend = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
rollsCount = rolls.get();
try {
dodgyWAL.throwAppendException = true;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
region.put(put);
} catch (IOException ioe) {
threwOnBoth = true;
}
while (rollsCount == rolls.get()) Threads.sleep(100);
// Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
// to just continue.
// So, should be no abort at this stage. Verify.
Mockito.verify(server, Mockito.atLeast(0)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
try {
dodgyWAL.throwAppendException = false;
dodgyWAL.throwSyncException = true;
Put put = new Put(value);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
region.put(put);
} catch (IOException ioe) {
threwOnSync = true;
}
// happens. If it don't we'll timeout the whole test. That is fine.
while (true) {
try {
Mockito.verify(server, Mockito.atLeast(1)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
break;
} catch (WantedButNotInvoked t) {
Threads.sleep(1);
}
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(server.isStopped()).thenReturn(true);
if (logRoller != null)
logRoller.close();
if (region != null) {
try {
region.close(true);
} catch (DroppedSnapshotException e) {
LOG.info("On way out; expected!", e);
}
}
if (dodgyWAL != null)
dodgyWAL.close();
assertTrue("The regionserver should have thrown an exception", threwOnBoth);
assertTrue("The regionserver should have thrown an exception", threwOnAppend);
assertTrue("The regionserver should have thrown an exception", threwOnSync);
}
}
Aggregations