Search in sources :

Example 1 with FSHLog

use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.

the class TestWALLockup method testLockupWhenSyncInMiddleOfZigZagSetup.

/**
   * Reproduce locking up that happens when we get an inopportune sync during setup for
   * zigzaglatch wait. See HBASE-14317. If below is broken, we will see this test timeout because
   * it is locked up.
   * <p>First I need to set up some mocks for Server and RegionServerServices. I also need to
   * set up a dodgy WAL that will throw an exception when we go to append to it.
   */
@Test(timeout = 20000)
public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {

        // Set this when want the WAL to start throwing exceptions.
        volatile boolean throwException = false;

        // Latch to hold up processing until after another operation has had time to run.
        CountDownLatch latch = new CountDownLatch(1);

        public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
            super(fs, root, logDir, conf);
        }

        @Override
        protected void afterCreatingZigZagLatch() {
            // the lock up we've seen in production.
            if (throwException) {
                try {
                    LOG.info("LATCHED");
                    // because all WALs have rolled. In this case, just give up on test.
                    if (!this.latch.await(5, TimeUnit.SECONDS)) {
                        LOG.warn("GIVE UP! Failed waiting on latch...Test is ABORTED!");
                    }
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            }
        }

        @Override
        protected void beforeWaitOnSafePoint() {
            if (throwException) {
                LOG.info("COUNTDOWN");
                // be stuck; test won't go down
                while (this.latch.getCount() <= 0) Threads.sleep(1);
                this.latch.countDown();
            }
        }

        @Override
        protected Writer createWriterInstance(Path path) throws IOException {
            final Writer w = super.createWriterInstance(path);
            return new Writer() {

                @Override
                public void close() throws IOException {
                    w.close();
                }

                @Override
                public void sync() throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
                    }
                    w.sync();
                }

                @Override
                public void append(Entry entry) throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
                    }
                    w.append(entry);
                }

                @Override
                public long getLength() {
                    return w.getLength();
                }
            };
        }
    }
    // Mocked up server and regionserver services. Needed below.
    Server server = Mockito.mock(Server.class);
    Mockito.when(server.getConfiguration()).thenReturn(CONF);
    Mockito.when(server.isStopped()).thenReturn(false);
    Mockito.when(server.isAborted()).thenReturn(false);
    RegionServerServices services = Mockito.mock(RegionServerServices.class);
    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path originalWAL = dodgyWAL.getCurrentFileName();
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
        // First get something into memstore. Make a Put and then pull the Cell out of it. Will
        // manage append and sync carefully in below to manufacture hang. We keep adding same
        // edit. WAL subsystem doesn't care.
        Put put = new Put(bytes);
        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
        WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
        WALEdit edit = new WALEdit();
        CellScanner CellScanner = put.cellScanner();
        assertTrue(CellScanner.advance());
        edit.add(CellScanner.current());
        // out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
        for (int i = 0; i < 1000; i++) {
            region.put(put);
        }
        // Set it so we start throwing exceptions.
        LOG.info("SET throwing of exception on append");
        dodgyWAL.throwException = true;
        // This append provokes a WAL roll request
        dodgyWAL.append(region.getRegionInfo(), key, edit, true);
        boolean exception = false;
        try {
            dodgyWAL.sync();
        } catch (Exception e) {
            exception = true;
        }
        assertTrue("Did not get sync exception", exception);
        // Get a memstore flush going too so we have same hung profile as up in the issue over
        // in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
        // by the zigzaglatch waiting on syncs to come home.
        Thread t = new Thread("Flusher") {

            public void run() {
                try {
                    if (region.getMemstoreSize() <= 0) {
                        throw new IOException("memstore size=" + region.getMemstoreSize());
                    }
                    region.flush(false);
                } catch (IOException e) {
                    // Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
                    // when roll completes.
                    LOG.info("In flush", e);
                }
                LOG.info("Exiting");
            }

            ;
        };
        t.setDaemon(true);
        t.start();
        // Wait until 
        while (dodgyWAL.latch.getCount() > 0) Threads.sleep(1);
        // Now assert I got a new WAL file put in place even though loads of errors above.
        assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
        // Can I append to it?
        dodgyWAL.throwException = false;
        try {
            region.put(put);
        } catch (Exception e) {
            LOG.info("In the put", e);
        }
    } finally {
        // To stop logRoller, its server has to say it is stopped.
        Mockito.when(server.isStopped()).thenReturn(true);
        if (logRoller != null)
            logRoller.close();
        try {
            if (region != null)
                region.close();
            if (dodgyWAL != null)
                dodgyWAL.close();
        } catch (Exception e) {
            LOG.info("On way out", e);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Server(org.apache.hadoop.hbase.Server) CellScanner(org.apache.hadoop.hbase.CellScanner) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) TreeMap(java.util.TreeMap) Put(org.apache.hadoop.hbase.client.Put) DamagedWALException(org.apache.hadoop.hbase.regionserver.wal.DamagedWALException) IOException(java.io.IOException) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Writer(org.apache.hadoop.hbase.wal.WALProvider.Writer) Test(org.junit.Test)

Example 2 with FSHLog

use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.

the class TestCompactionPolicy method initialize.

/**
   * Setting up a Store
   * @throws IOException with error
   */
protected void initialize() throws IOException {
    Path basedir = new Path(DIR);
    String logName = "logs";
    Path logdir = new Path(DIR, logName);
    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes("family"));
    FileSystem fs = FileSystem.get(conf);
    fs.delete(logdir, true);
    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(Bytes.toBytes("table")));
    htd.addFamily(hcd);
    HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false);
    hlog = new FSHLog(fs, basedir, logName, conf);
    region = HRegion.createHRegion(info, basedir, conf, htd, hlog);
    region.close();
    Path tableDir = FSUtils.getTableDir(basedir, htd.getTableName());
    region = new HRegion(tableDir, hlog, fs, conf, info, htd, null);
    store = new HStore(region, hcd, conf);
    TEST_FILE = region.getRegionFileSystem().createTempName();
    fs.createNewFile(TEST_FILE);
}
Also used : Path(org.apache.hadoop.fs.Path) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) FileSystem(org.apache.hadoop.fs.FileSystem) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog)

Example 3 with FSHLog

use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.

the class TestFailedAppendAndSync method testLockupAroundBadAssignSync.

/**
   * Reproduce locking up that happens when we get an exceptions appending and syncing.
   * See HBASE-14317.
   * First I need to set up some mocks for Server and RegionServerServices. I also need to
   * set up a dodgy WAL that will throw an exception when we go to append to it.
   */
@Test(timeout = 300000)
public void testLockupAroundBadAssignSync() throws IOException {
    final AtomicLong rolls = new AtomicLong(0);
    // Dodgy WAL. Will throw exceptions when flags set.
    class DodgyFSLog extends FSHLog {

        volatile boolean throwSyncException = false;

        volatile boolean throwAppendException = false;

        public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
            super(fs, root, logDir, conf);
        }

        @Override
        public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
            byte[][] regions = super.rollWriter(force);
            rolls.getAndIncrement();
            return regions;
        }

        @Override
        protected Writer createWriterInstance(Path path) throws IOException {
            final Writer w = super.createWriterInstance(path);
            return new Writer() {

                @Override
                public void close() throws IOException {
                    w.close();
                }

                @Override
                public void sync() throws IOException {
                    if (throwSyncException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...");
                    }
                    w.sync();
                }

                @Override
                public void append(Entry entry) throws IOException {
                    if (throwAppendException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...");
                    }
                    w.append(entry);
                }

                @Override
                public long getLength() {
                    return w.getLength();
                }
            };
        }
    }
    // Make up mocked server and services.
    Server server = mock(Server.class);
    when(server.getConfiguration()).thenReturn(CONF);
    when(server.isStopped()).thenReturn(false);
    when(server.isAborted()).thenReturn(false);
    RegionServerServices services = mock(RegionServerServices.class);
    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
    // the test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    logRoller.start();
    boolean threwOnSync = false;
    boolean threwOnAppend = false;
    boolean threwOnBoth = false;
    HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    try {
        // Get some random bytes.
        byte[] value = Bytes.toBytes(getName());
        try {
            // First get something into memstore
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
            region.put(put);
        } catch (IOException ioe) {
            fail();
        }
        long rollsCount = rolls.get();
        try {
            dodgyWAL.throwAppendException = true;
            dodgyWAL.throwSyncException = false;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnAppend = true;
        }
        while (rollsCount == rolls.get()) Threads.sleep(100);
        rollsCount = rolls.get();
        try {
            dodgyWAL.throwAppendException = true;
            dodgyWAL.throwSyncException = true;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnBoth = true;
        }
        while (rollsCount == rolls.get()) Threads.sleep(100);
        // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
        // to just continue.
        // So, should be no abort at this stage. Verify.
        Mockito.verify(server, Mockito.atLeast(0)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
        try {
            dodgyWAL.throwAppendException = false;
            dodgyWAL.throwSyncException = true;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnSync = true;
        }
        // happens. If it don't we'll timeout the whole test. That is fine.
        while (true) {
            try {
                Mockito.verify(server, Mockito.atLeast(1)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
                break;
            } catch (WantedButNotInvoked t) {
                Threads.sleep(1);
            }
        }
    } finally {
        // To stop logRoller, its server has to say it is stopped.
        Mockito.when(server.isStopped()).thenReturn(true);
        if (logRoller != null)
            logRoller.close();
        if (region != null) {
            try {
                region.close(true);
            } catch (DroppedSnapshotException e) {
                LOG.info("On way out; expected!", e);
            }
        }
        if (dodgyWAL != null)
            dodgyWAL.close();
        assertTrue("The regionserver should have thrown an exception", threwOnBoth);
        assertTrue("The regionserver should have thrown an exception", threwOnAppend);
        assertTrue("The regionserver should have thrown an exception", threwOnSync);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Server(org.apache.hadoop.hbase.Server) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) IOException(java.io.IOException) Put(org.apache.hadoop.hbase.client.Put) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) AtomicLong(java.util.concurrent.atomic.AtomicLong) FileSystem(org.apache.hadoop.fs.FileSystem) Writer(org.apache.hadoop.hbase.wal.WALProvider.Writer) WantedButNotInvoked(org.mockito.exceptions.verification.WantedButNotInvoked) Test(org.junit.Test)

Example 4 with FSHLog

use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.

the class TestHRegion method testMemstoreSizeWithFlushCanceling.

/**
   * Test for HBASE-14229: Flushing canceled by coprocessor still leads to memstoreSize set down
   */
@Test
public void testMemstoreSizeWithFlushCanceling() throws IOException {
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + "testMemstoreSizeWithFlushCanceling");
    FSHLog hLog = new FSHLog(fs, rootDir, "testMemstoreSizeWithFlushCanceling", CONF);
    HRegion region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, hLog, COLUMN_FAMILY_BYTES);
    Store store = region.getStore(COLUMN_FAMILY_BYTES);
    assertEquals(0, region.getMemstoreSize());
    // Put some value and make sure flush could be completed normally
    byte[] value = Bytes.toBytes(method);
    Put put = new Put(value);
    put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value);
    region.put(put);
    long onePutSize = region.getMemstoreSize();
    assertTrue(onePutSize > 0);
    region.flush(true);
    assertEquals("memstoreSize should be zero", 0, region.getMemstoreSize());
    assertEquals("flushable size should be zero", 0, store.getSizeToFlush().getDataSize());
    // save normalCPHost and replaced by mockedCPHost, which will cancel flush requests
    RegionCoprocessorHost normalCPHost = region.getCoprocessorHost();
    RegionCoprocessorHost mockedCPHost = Mockito.mock(RegionCoprocessorHost.class);
    when(mockedCPHost.preFlush(Mockito.isA(HStore.class), Mockito.isA(InternalScanner.class))).thenReturn(null);
    region.setCoprocessorHost(mockedCPHost);
    region.put(put);
    region.flush(true);
    assertEquals("memstoreSize should NOT be zero", onePutSize, region.getMemstoreSize());
    assertEquals("flushable size should NOT be zero", onePutSize, store.getSizeToFlush().getDataSize());
    // set normalCPHost and flush again, the snapshot will be flushed
    region.setCoprocessorHost(normalCPHost);
    region.flush(true);
    assertEquals("memstoreSize should be zero", 0, region.getMemstoreSize());
    assertEquals("flushable size should be zero", 0, store.getSizeToFlush().getDataSize());
    HBaseTestingUtility.closeRegionAndWAL(region);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FaultyFileSystem(org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem) Put(org.apache.hadoop.hbase.client.Put) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) Test(org.junit.Test)

Example 5 with FSHLog

use of org.apache.hadoop.hbase.regionserver.wal.FSHLog in project hbase by apache.

the class TestHRegion method testMemstoreSizeAccountingWithFailedPostBatchMutate.

@Test
public void testMemstoreSizeAccountingWithFailedPostBatchMutate() throws IOException {
    String testName = "testMemstoreSizeAccountingWithFailedPostBatchMutate";
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + testName);
    FSHLog hLog = new FSHLog(fs, rootDir, testName, CONF);
    HRegion region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, hLog, COLUMN_FAMILY_BYTES);
    Store store = region.getStore(COLUMN_FAMILY_BYTES);
    assertEquals(0, region.getMemstoreSize());
    // Put one value
    byte[] value = Bytes.toBytes(method);
    Put put = new Put(value);
    put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("abc"), value);
    region.put(put);
    long onePutSize = region.getMemstoreSize();
    assertTrue(onePutSize > 0);
    RegionCoprocessorHost mockedCPHost = Mockito.mock(RegionCoprocessorHost.class);
    doThrow(new IOException()).when(mockedCPHost).postBatchMutate(Mockito.<MiniBatchOperationInProgress<Mutation>>any());
    region.setCoprocessorHost(mockedCPHost);
    put = new Put(value);
    put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("dfg"), value);
    try {
        region.put(put);
        fail("Should have failed with IOException");
    } catch (IOException expected) {
    }
    long expectedSize = onePutSize * 2;
    assertEquals("memstoreSize should be incremented", expectedSize, region.getMemstoreSize());
    assertEquals("flushable size should be incremented", expectedSize, store.getSizeToFlush().getDataSize());
    region.setCoprocessorHost(null);
    HBaseTestingUtility.closeRegionAndWAL(region);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) FaultyFileSystem(org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem) ByteString(org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Mutation(org.apache.hadoop.hbase.client.Mutation) Put(org.apache.hadoop.hbase.client.Put) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) Test(org.junit.Test)

Aggregations

Path (org.apache.hadoop.fs.Path)9 FSHLog (org.apache.hadoop.hbase.regionserver.wal.FSHLog)9 FileSystem (org.apache.hadoop.fs.FileSystem)8 Test (org.junit.Test)8 Put (org.apache.hadoop.hbase.client.Put)7 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)4 FaultyFileSystem (org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem)4 Writer (org.apache.hadoop.hbase.wal.WALProvider.Writer)4 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)3 Server (org.apache.hadoop.hbase.Server)3 InterruptedIOException (java.io.InterruptedIOException)2 TreeMap (java.util.TreeMap)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 Cell (org.apache.hadoop.hbase.Cell)2 CellScanner (org.apache.hadoop.hbase.CellScanner)2 DroppedSnapshotException (org.apache.hadoop.hbase.DroppedSnapshotException)2 DamagedWALException (org.apache.hadoop.hbase.regionserver.wal.DamagedWALException)2 FailedLogCloseException (org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException)2 WALEdit (org.apache.hadoop.hbase.regionserver.wal.WALEdit)2