Search in sources :

Example 1 with DamagedWALException

use of org.apache.hadoop.hbase.regionserver.wal.DamagedWALException in project hbase by apache.

the class TestWALLockup method testLockup16960.

   * Reproduce locking up that happens when there's no further syncs after
   * append fails, and causing an isolated sync then infinite wait. See
   * HBASE-16960. If below is broken, we will see this test timeout because it
   * is locked up.
   * <p/>
   * Steps for reproduce:<br/>
   * 1. Trigger server abort through dodgyWAL1<br/>
   * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer
   * event handler thread sleep for a while thus keeping {@code endOfBatch}
   * false<br/>
   * 3. Publish a sync then an append which will throw exception, check whether
   * the sync could return
@Test(timeout = 20000)
public void testLockup16960() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {

        // Set this when want the WAL to start throwing exceptions.
        volatile boolean throwException = false;

        public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
            super(fs, root, logDir, conf);

        protected Writer createWriterInstance(Path path) throws IOException {
            final Writer w = super.createWriterInstance(path);
            return new Writer() {

                public void close() throws IOException {

                public void sync() throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");

                public void append(Entry entry) throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");

                public long getLength() {
                    return w.getLength();

        protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter) throws IOException {
            if (throwException) {
                throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
            long oldFileLen = 0L;
            oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
            return oldFileLen;
    // Mocked up server and regionserver services. Needed below.
    Server server = new DummyServer(CONF, ServerName.valueOf("", 1234, 1L).toString());
    RegionServerServices services = Mockito.mock(RegionServerServices.class);
    CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);
    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
    // go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path rootDir2 = new Path(dir + getName() + "2");
    final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
    // Add a listener to force ringbuffer event handler sleep for a while
    dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    // There is no 'stop' once a logRoller is running.. it just dies.
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
        Put put = new Put(bytes);
        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
        WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
        WALEdit edit = new WALEdit();
        CellScanner CellScanner = put.cellScanner();
        edit.add(CellScanner.current());"SET throwing of exception on append");
        dodgyWAL1.throwException = true;
        // This append provokes a WAL roll request
        dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
        boolean exception = false;
        try {
        } catch (Exception e) {
            exception = true;
        assertTrue("Did not get sync exception", exception);
        // cause server abort.
        try {
            // wait LogRoller exit.
        } catch (InterruptedException e) {
        final CountDownLatch latch = new CountDownLatch(1);
        // make RingBufferEventHandler sleep 1s, so the following sync
        // endOfBatch=false
        key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc, scopes);
        dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
        Thread t = new Thread("Sync") {

            public void run() {
                try {
                } catch (IOException e) {
          "In sync", e);
      "Sync exiting");

        try {
            // make sure sync have published.
        } catch (InterruptedException e1) {
        // make append throw DamagedWALException
        key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc, scopes);
        dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
        while (latch.getCount() > 0) {
    } finally {
        if (logRoller != null) {
        try {
            if (region != null) {
            if (dodgyWAL1 != null) {
            if (dodgyWAL2 != null) {
        } catch (Exception e) {
  "On way out", e);
Also used : Configuration(org.apache.hadoop.conf.Configuration) Server(org.apache.hadoop.hbase.Server) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) CellScanner(org.apache.hadoop.hbase.CellScanner) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) Path(org.apache.hadoop.fs.Path) IOException( TreeMap(java.util.TreeMap) CountDownLatch(java.util.concurrent.CountDownLatch) Put(org.apache.hadoop.hbase.client.Put) DamagedWALException(org.apache.hadoop.hbase.regionserver.wal.DamagedWALException) IOException( FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Writer(org.apache.hadoop.hbase.wal.WALProvider.Writer) Test(org.junit.Test)


IOException ( TreeMap (java.util.TreeMap)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 CellScanner (org.apache.hadoop.hbase.CellScanner)1 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)1 Server (org.apache.hadoop.hbase.Server)1 Put (org.apache.hadoop.hbase.client.Put)1 DamagedWALException (org.apache.hadoop.hbase.regionserver.wal.DamagedWALException)1 FSHLog (org.apache.hadoop.hbase.regionserver.wal.FSHLog)1 FailedLogCloseException (org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException)1 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)1 WALKey (org.apache.hadoop.hbase.wal.WALKey)1 Writer (org.apache.hadoop.hbase.wal.WALProvider.Writer)1 Test (org.junit.Test)1