use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.
the class LogRoller method run.
@Override
public void run() {
while (running) {
long now = System.currentTimeMillis();
boolean periodic = false;
if (!rollLog.get()) {
periodic = (now - this.lastrolltime) > this.rollperiod;
if (!periodic) {
synchronized (rollLog) {
try {
if (!rollLog.get()) {
rollLog.wait(this.threadWakeFrequency);
}
} catch (InterruptedException e) {
// Fall through
}
}
continue;
}
// Time for periodic roll
if (LOG.isDebugEnabled()) {
LOG.debug("Wal roll period " + this.rollperiod + "ms elapsed");
}
} else if (LOG.isDebugEnabled()) {
LOG.debug("WAL roll requested");
}
// FindBugs UL_UNRELEASED_LOCK_EXCEPTION_PATH
rollLock.lock();
try {
this.lastrolltime = now;
for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) {
final WAL wal = entry.getKey();
// Force the roll if the logroll.period is elapsed or if a roll was requested.
// The returned value is an array of actual region names.
final byte[][] regionsToFlush = wal.rollWriter(periodic || entry.getValue().booleanValue());
walNeedsRoll.put(wal, Boolean.FALSE);
if (regionsToFlush != null) {
for (byte[] r : regionsToFlush) scheduleFlush(r);
}
}
} catch (FailedLogCloseException e) {
server.abort("Failed log close in log roller", e);
} catch (java.net.ConnectException e) {
server.abort("Failed log close in log roller", e);
} catch (IOException ex) {
// Abort if we get here. We probably won't recover an IOE. HBASE-1132
server.abort("IOE in log roller", ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex);
} catch (Exception ex) {
LOG.error("Log rolling failed", ex);
server.abort("Log rolling failed", ex);
} finally {
try {
rollLog.set(false);
} finally {
rollLock.unlock();
}
}
}
LOG.info("LogRoller exiting.");
}
use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.
the class TestWALLockup method testLockup16960.
/**
* Reproduce locking up that happens when there's no further syncs after
* append fails, and causing an isolated sync then infinite wait. See
* HBASE-16960. If below is broken, we will see this test timeout because it
* is locked up.
* <p/>
* Steps for reproduce:<br/>
* 1. Trigger server abort through dodgyWAL1<br/>
* 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer
* event handler thread sleep for a while thus keeping {@code endOfBatch}
* false<br/>
* 3. Publish a sync then an append which will throw exception, check whether
* the sync could return
*/
@Test(timeout = 20000)
public void testLockup16960() throws IOException {
// A WAL that we can have throw exceptions when a flag is set.
class DodgyFSLog extends FSHLog {
// Set this when want the WAL to start throwing exceptions.
volatile boolean throwException = false;
public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
super(fs, root, logDir, conf);
}
@Override
protected Writer createWriterInstance(Path path) throws IOException {
final Writer w = super.createWriterInstance(path);
return new Writer() {
@Override
public void close() throws IOException {
w.close();
}
@Override
public void sync() throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
}
w.sync();
}
@Override
public void append(Entry entry) throws IOException {
if (throwException) {
throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
}
w.append(entry);
}
@Override
public long getLength() {
return w.getLength();
}
};
}
@Override
protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter) throws IOException {
if (throwException) {
throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
}
long oldFileLen = 0L;
oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
return oldFileLen;
}
}
// Mocked up server and regionserver services. Needed below.
Server server = new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString());
RegionServerServices services = Mockito.mock(RegionServerServices.class);
CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);
// OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
// go ahead with test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);
Path rootDir2 = new Path(dir + getName() + "2");
final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
// Add a listener to force ringbuffer event handler sleep for a while
dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());
// I need a log roller running.
LogRoller logRoller = new LogRoller(server, services);
logRoller.addWAL(dodgyWAL1);
logRoller.addWAL(dodgyWAL2);
// There is no 'stop' once a logRoller is running.. it just dies.
logRoller.start();
// Now get a region and start adding in edits.
HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
byte[] bytes = Bytes.toBytes(getName());
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
scopes.put(COLUMN_FAMILY_BYTES, 0);
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
try {
Put put = new Put(bytes);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
WALEdit edit = new WALEdit();
CellScanner CellScanner = put.cellScanner();
assertTrue(CellScanner.advance());
edit.add(CellScanner.current());
LOG.info("SET throwing of exception on append");
dodgyWAL1.throwException = true;
// This append provokes a WAL roll request
dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
boolean exception = false;
try {
dodgyWAL1.sync();
} catch (Exception e) {
exception = true;
}
assertTrue("Did not get sync exception", exception);
// cause server abort.
try {
// wait LogRoller exit.
Thread.sleep(50);
} catch (InterruptedException e) {
e.printStackTrace();
}
final CountDownLatch latch = new CountDownLatch(1);
// make RingBufferEventHandler sleep 1s, so the following sync
// endOfBatch=false
key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc, scopes);
dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
Thread t = new Thread("Sync") {
public void run() {
try {
dodgyWAL2.sync();
} catch (IOException e) {
LOG.info("In sync", e);
}
latch.countDown();
LOG.info("Sync exiting");
}
;
};
t.setDaemon(true);
t.start();
try {
// make sure sync have published.
Thread.sleep(100);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
// make append throw DamagedWALException
key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc, scopes);
dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
while (latch.getCount() > 0) {
Threads.sleep(100);
}
assertTrue(server.isAborted());
} finally {
if (logRoller != null) {
logRoller.close();
}
try {
if (region != null) {
region.close();
}
if (dodgyWAL1 != null) {
dodgyWAL1.close();
}
if (dodgyWAL2 != null) {
dodgyWAL2.close();
}
} catch (Exception e) {
LOG.info("On way out", e);
}
}
}
use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.
the class AbstractWALRoller method run.
@Override
public void run() {
while (running) {
long now = EnvironmentEdgeManager.currentTime();
checkLowReplication(now);
synchronized (this) {
if (wals.values().stream().noneMatch(rc -> rc.needsRoll(now))) {
try {
wait(this.threadWakeFrequency);
} catch (InterruptedException e) {
// restore the interrupt state
Thread.currentThread().interrupt();
}
// several WALs, and also check whether we should quit.
continue;
}
}
try {
for (Iterator<Entry<WAL, RollController>> iter = wals.entrySet().iterator(); iter.hasNext(); ) {
Entry<WAL, RollController> entry = iter.next();
WAL wal = entry.getKey();
RollController controller = entry.getValue();
if (controller.isRollRequested()) {
// WAL roll requested, fall through
LOG.debug("WAL {} roll requested", wal);
} else if (controller.needsPeriodicRoll(now)) {
// Time for periodic roll, fall through
LOG.debug("WAL {} roll period {} ms elapsed", wal, this.rollPeriod);
} else {
continue;
}
try {
// Force the roll if the logroll.period is elapsed or if a roll was requested.
// The returned value is an collection of actual region and family names.
Map<byte[], List<byte[]>> regionsToFlush = controller.rollWal(now);
if (regionsToFlush != null) {
for (Map.Entry<byte[], List<byte[]>> r : regionsToFlush.entrySet()) {
scheduleFlush(Bytes.toString(r.getKey()), r.getValue());
}
}
} catch (WALClosedException e) {
LOG.warn("WAL has been closed. Skipping rolling of writer and just remove it", e);
iter.remove();
}
}
} catch (FailedLogCloseException | ConnectException e) {
abort("Failed log close in log roller", e);
} catch (IOException ex) {
// Abort if we get here. We probably won't recover an IOE. HBASE-1132
abort("IOE in log roller", ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex);
} catch (Exception ex) {
LOG.error("Log rolling failed", ex);
abort("Log rolling failed", ex);
}
}
LOG.info("LogRoller exiting.");
}
Aggregations