Search in sources :

Example 1 with FailedLogCloseException

use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.

the class LogRoller method run.

@Override
public void run() {
    while (running) {
        long now = System.currentTimeMillis();
        boolean periodic = false;
        if (!rollLog.get()) {
            periodic = (now - this.lastrolltime) > this.rollperiod;
            if (!periodic) {
                synchronized (rollLog) {
                    try {
                        if (!rollLog.get()) {
                            rollLog.wait(this.threadWakeFrequency);
                        }
                    } catch (InterruptedException e) {
                    // Fall through
                    }
                }
                continue;
            }
            // Time for periodic roll
            if (LOG.isDebugEnabled()) {
                LOG.debug("Wal roll period " + this.rollperiod + "ms elapsed");
            }
        } else if (LOG.isDebugEnabled()) {
            LOG.debug("WAL roll requested");
        }
        // FindBugs UL_UNRELEASED_LOCK_EXCEPTION_PATH
        rollLock.lock();
        try {
            this.lastrolltime = now;
            for (Entry<WAL, Boolean> entry : walNeedsRoll.entrySet()) {
                final WAL wal = entry.getKey();
                // Force the roll if the logroll.period is elapsed or if a roll was requested.
                // The returned value is an array of actual region names.
                final byte[][] regionsToFlush = wal.rollWriter(periodic || entry.getValue().booleanValue());
                walNeedsRoll.put(wal, Boolean.FALSE);
                if (regionsToFlush != null) {
                    for (byte[] r : regionsToFlush) scheduleFlush(r);
                }
            }
        } catch (FailedLogCloseException e) {
            server.abort("Failed log close in log roller", e);
        } catch (java.net.ConnectException e) {
            server.abort("Failed log close in log roller", e);
        } catch (IOException ex) {
            // Abort if we get here.  We probably won't recover an IOE. HBASE-1132
            server.abort("IOE in log roller", ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex);
        } catch (Exception ex) {
            LOG.error("Log rolling failed", ex);
            server.abort("Log rolling failed", ex);
        } finally {
            try {
                rollLog.set(false);
            } finally {
                rollLock.unlock();
            }
        }
    }
    LOG.info("LogRoller exiting.");
}
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) IOException(java.io.IOException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 2 with FailedLogCloseException

use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.

the class TestWALLockup method testLockup16960.

/**
   * Reproduce locking up that happens when there's no further syncs after
   * append fails, and causing an isolated sync then infinite wait. See
   * HBASE-16960. If below is broken, we will see this test timeout because it
   * is locked up.
   * <p/>
   * Steps for reproduce:<br/>
   * 1. Trigger server abort through dodgyWAL1<br/>
   * 2. Add a {@link DummyWALActionsListener} to dodgyWAL2 to cause ringbuffer
   * event handler thread sleep for a while thus keeping {@code endOfBatch}
   * false<br/>
   * 3. Publish a sync then an append which will throw exception, check whether
   * the sync could return
   */
@Test(timeout = 20000)
public void testLockup16960() throws IOException {
    // A WAL that we can have throw exceptions when a flag is set.
    class DodgyFSLog extends FSHLog {

        // Set this when want the WAL to start throwing exceptions.
        volatile boolean throwException = false;

        public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
            super(fs, root, logDir, conf);
        }

        @Override
        protected Writer createWriterInstance(Path path) throws IOException {
            final Writer w = super.createWriterInstance(path);
            return new Writer() {

                @Override
                public void close() throws IOException {
                    w.close();
                }

                @Override
                public void sync() throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...SYNC");
                    }
                    w.sync();
                }

                @Override
                public void append(Entry entry) throws IOException {
                    if (throwException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...APPEND");
                    }
                    w.append(entry);
                }

                @Override
                public long getLength() {
                    return w.getLength();
                }
            };
        }

        @Override
        protected long doReplaceWriter(Path oldPath, Path newPath, Writer nextWriter) throws IOException {
            if (throwException) {
                throw new FailedLogCloseException("oldPath=" + oldPath + ", newPath=" + newPath);
            }
            long oldFileLen = 0L;
            oldFileLen = super.doReplaceWriter(oldPath, newPath, nextWriter);
            return oldFileLen;
        }
    }
    // Mocked up server and regionserver services. Needed below.
    Server server = new DummyServer(CONF, ServerName.valueOf("hostname1.example.org", 1234, 1L).toString());
    RegionServerServices services = Mockito.mock(RegionServerServices.class);
    CONF.setLong("hbase.regionserver.hlog.sync.timeout", 10000);
    // OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL,
    // go ahead with test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL1 = new DodgyFSLog(fs, rootDir, getName(), CONF);
    Path rootDir2 = new Path(dir + getName() + "2");
    final DodgyFSLog dodgyWAL2 = new DodgyFSLog(fs, rootDir2, getName() + "2", CONF);
    // Add a listener to force ringbuffer event handler sleep for a while
    dodgyWAL2.registerWALActionsListener(new DummyWALActionsListener());
    // I need a log roller running.
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL1);
    logRoller.addWAL(dodgyWAL2);
    // There is no 'stop' once a logRoller is running.. it just dies.
    logRoller.start();
    // Now get a region and start adding in edits.
    HTableDescriptor htd = new HTableDescriptor(TableName.META_TABLE_NAME);
    final HRegion region = initHRegion(tableName, null, null, dodgyWAL1);
    byte[] bytes = Bytes.toBytes(getName());
    NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
    scopes.put(COLUMN_FAMILY_BYTES, 0);
    MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
    try {
        Put put = new Put(bytes);
        put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
        WALKey key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), htd.getTableName(), System.currentTimeMillis(), mvcc, scopes);
        WALEdit edit = new WALEdit();
        CellScanner CellScanner = put.cellScanner();
        assertTrue(CellScanner.advance());
        edit.add(CellScanner.current());
        LOG.info("SET throwing of exception on append");
        dodgyWAL1.throwException = true;
        // This append provokes a WAL roll request
        dodgyWAL1.append(region.getRegionInfo(), key, edit, true);
        boolean exception = false;
        try {
            dodgyWAL1.sync();
        } catch (Exception e) {
            exception = true;
        }
        assertTrue("Did not get sync exception", exception);
        // cause server abort.
        try {
            // wait LogRoller exit.
            Thread.sleep(50);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        final CountDownLatch latch = new CountDownLatch(1);
        // make RingBufferEventHandler sleep 1s, so the following sync
        // endOfBatch=false
        key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("sleep"), System.currentTimeMillis(), mvcc, scopes);
        dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
        Thread t = new Thread("Sync") {

            public void run() {
                try {
                    dodgyWAL2.sync();
                } catch (IOException e) {
                    LOG.info("In sync", e);
                }
                latch.countDown();
                LOG.info("Sync exiting");
            }

            ;
        };
        t.setDaemon(true);
        t.start();
        try {
            // make sure sync have published.
            Thread.sleep(100);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        // make append throw DamagedWALException
        key = new WALKey(region.getRegionInfo().getEncodedNameAsBytes(), TableName.valueOf("DamagedWALException"), System.currentTimeMillis(), mvcc, scopes);
        dodgyWAL2.append(region.getRegionInfo(), key, edit, true);
        while (latch.getCount() > 0) {
            Threads.sleep(100);
        }
        assertTrue(server.isAborted());
    } finally {
        if (logRoller != null) {
            logRoller.close();
        }
        try {
            if (region != null) {
                region.close();
            }
            if (dodgyWAL1 != null) {
                dodgyWAL1.close();
            }
            if (dodgyWAL2 != null) {
                dodgyWAL2.close();
            }
        } catch (Exception e) {
            LOG.info("On way out", e);
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Server(org.apache.hadoop.hbase.Server) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) CellScanner(org.apache.hadoop.hbase.CellScanner) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) WALKey(org.apache.hadoop.hbase.wal.WALKey) WALEdit(org.apache.hadoop.hbase.regionserver.wal.WALEdit) FileSystem(org.apache.hadoop.fs.FileSystem) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) TreeMap(java.util.TreeMap) CountDownLatch(java.util.concurrent.CountDownLatch) Put(org.apache.hadoop.hbase.client.Put) DamagedWALException(org.apache.hadoop.hbase.regionserver.wal.DamagedWALException) IOException(java.io.IOException) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Writer(org.apache.hadoop.hbase.wal.WALProvider.Writer) Test(org.junit.Test)

Example 3 with FailedLogCloseException

use of org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException in project hbase by apache.

the class AbstractWALRoller method run.

@Override
public void run() {
    while (running) {
        long now = EnvironmentEdgeManager.currentTime();
        checkLowReplication(now);
        synchronized (this) {
            if (wals.values().stream().noneMatch(rc -> rc.needsRoll(now))) {
                try {
                    wait(this.threadWakeFrequency);
                } catch (InterruptedException e) {
                    // restore the interrupt state
                    Thread.currentThread().interrupt();
                }
                // several WALs, and also check whether we should quit.
                continue;
            }
        }
        try {
            for (Iterator<Entry<WAL, RollController>> iter = wals.entrySet().iterator(); iter.hasNext(); ) {
                Entry<WAL, RollController> entry = iter.next();
                WAL wal = entry.getKey();
                RollController controller = entry.getValue();
                if (controller.isRollRequested()) {
                    // WAL roll requested, fall through
                    LOG.debug("WAL {} roll requested", wal);
                } else if (controller.needsPeriodicRoll(now)) {
                    // Time for periodic roll, fall through
                    LOG.debug("WAL {} roll period {} ms elapsed", wal, this.rollPeriod);
                } else {
                    continue;
                }
                try {
                    // Force the roll if the logroll.period is elapsed or if a roll was requested.
                    // The returned value is an collection of actual region and family names.
                    Map<byte[], List<byte[]>> regionsToFlush = controller.rollWal(now);
                    if (regionsToFlush != null) {
                        for (Map.Entry<byte[], List<byte[]>> r : regionsToFlush.entrySet()) {
                            scheduleFlush(Bytes.toString(r.getKey()), r.getValue());
                        }
                    }
                } catch (WALClosedException e) {
                    LOG.warn("WAL has been closed. Skipping rolling of writer and just remove it", e);
                    iter.remove();
                }
            }
        } catch (FailedLogCloseException | ConnectException e) {
            abort("Failed log close in log roller", e);
        } catch (IOException ex) {
            // Abort if we get here. We probably won't recover an IOE. HBASE-1132
            abort("IOE in log roller", ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex);
        } catch (Exception ex) {
            LOG.error("Log rolling failed", ex);
            abort("Log rolling failed", ex);
        }
    }
    LOG.info("LogRoller exiting.");
}
Also used : AbstractFSWAL(org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) WALClosedException(org.apache.hadoop.hbase.regionserver.wal.WALClosedException) IOException(java.io.IOException) IOException(java.io.IOException) RemoteException(org.apache.hadoop.ipc.RemoteException) WALClosedException(org.apache.hadoop.hbase.regionserver.wal.WALClosedException) ConnectException(java.net.ConnectException) FailedLogCloseException(org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException) Entry(java.util.Map.Entry) List(java.util.List) RemoteException(org.apache.hadoop.ipc.RemoteException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Map(java.util.Map) ConnectException(java.net.ConnectException)

Aggregations

IOException (java.io.IOException)3 FailedLogCloseException (org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException)3 RemoteException (org.apache.hadoop.ipc.RemoteException)2 ConnectException (java.net.ConnectException)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 TreeMap (java.util.TreeMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 CellScanner (org.apache.hadoop.hbase.CellScanner)1 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)1 Server (org.apache.hadoop.hbase.Server)1 Put (org.apache.hadoop.hbase.client.Put)1 AbstractFSWAL (org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL)1