Search in sources :

Example 1 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class RSRpcServices method flushRegion.

/**
   * Flush a region on the region server.
   *
   * @param controller the RPC controller
   * @param request the request
   * @throws ServiceException
   */
@Override
@QosPriority(priority = HConstants.ADMIN_QOS)
public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException {
    try {
        checkOpen();
        requestCount.increment();
        Region region = getRegion(request.getRegion());
        LOG.info("Flushing " + region.getRegionInfo().getRegionNameAsString());
        boolean shouldFlush = true;
        if (request.hasIfOlderThanTs()) {
            shouldFlush = region.getEarliestFlushTimeForAllStores() < request.getIfOlderThanTs();
        }
        FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder();
        if (shouldFlush) {
            boolean writeFlushWalMarker = request.hasWriteFlushWalMarker() ? request.getWriteFlushWalMarker() : false;
            // Go behind the curtain so we can manage writing of the flush WAL marker
            HRegion.FlushResultImpl flushResult = (HRegion.FlushResultImpl) ((HRegion) region).flushcache(true, writeFlushWalMarker);
            boolean compactionNeeded = flushResult.isCompactionNeeded();
            if (compactionNeeded) {
                regionServer.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush");
            }
            builder.setFlushed(flushResult.isFlushSucceeded());
            builder.setWroteFlushWalMarker(flushResult.wroteFlushWalMarker);
        }
        builder.setLastFlushTime(region.getEarliestFlushTimeForAllStores());
        return builder.build();
    } catch (DroppedSnapshotException ex) {
        // Cache flush can fail in a few places. If it fails in a critical
        // section, we get a DroppedSnapshotException and a replay of wal
        // is required. Currently the only way to do this is a restart of
        // the server.
        regionServer.abort("Replay of WAL required. Forcing server shutdown", ex);
        throw new ServiceException(ex);
    } catch (IOException ie) {
        throw new ServiceException(ie);
    }
}
Also used : DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) ServiceException(org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException) FlushRegionResponse(org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionResponse) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) HBaseIOException(org.apache.hadoop.hbase.HBaseIOException) QosPriority(org.apache.hadoop.hbase.ipc.QosPriority)

Example 2 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class MemStoreFlusher method flushRegion.

/**
   * Flush a region.
   * @param region Region to flush.
   * @param emergencyFlush Set if we are being force flushed. If true the region
   * needs to be removed from the flush queue. If false, when we were called
   * from the main flusher run loop and we got the entry to flush by calling
   * poll on the flush queue (which removed it).
   * @param forceFlushAllStores whether we want to flush all store.
   * @return true if the region was successfully flushed, false otherwise. If
   * false, there will be accompanying log messages explaining why the region was
   * not flushed.
   */
private boolean flushRegion(final Region region, final boolean emergencyFlush, boolean forceFlushAllStores) {
    synchronized (this.regionsInQueue) {
        FlushRegionEntry fqe = this.regionsInQueue.remove(region);
        // Use the start time of the FlushRegionEntry if available
        if (fqe != null && emergencyFlush) {
            // Need to remove from region from delay queue.  When NOT an
            // emergencyFlush, then item was removed via a flushQueue.poll.
            flushQueue.remove(fqe);
        }
    }
    lock.readLock().lock();
    try {
        notifyFlushRequest(region, emergencyFlush);
        FlushResult flushResult = region.flush(forceFlushAllStores);
        boolean shouldCompact = flushResult.isCompactionNeeded();
        // We just want to check the size
        boolean shouldSplit = ((HRegion) region).checkSplit() != null;
        if (shouldSplit) {
            this.server.compactSplitThread.requestSplit(region);
        } else if (shouldCompact) {
            server.compactSplitThread.requestSystemCompaction(region, Thread.currentThread().getName());
        }
    } catch (DroppedSnapshotException ex) {
        // Cache flush can fail in a few places. If it fails in a critical
        // section, we get a DroppedSnapshotException and a replay of wal
        // is required. Currently the only way to do this is a restart of
        // the server. Abort because hdfs is probably bad (HBASE-644 is a case
        // where hdfs was bad but passed the hdfs check).
        server.abort("Replay of WAL required. Forcing server shutdown", ex);
        return false;
    } catch (IOException ex) {
        ex = ex instanceof RemoteException ? ((RemoteException) ex).unwrapRemoteException() : ex;
        LOG.error("Cache flush failed" + (region != null ? (" for region " + Bytes.toStringBinary(region.getRegionInfo().getRegionName())) : ""), ex);
        if (!server.checkFileSystem()) {
            return false;
        }
    } finally {
        lock.readLock().unlock();
        wakeUpIfBlocking();
    }
    return true;
}
Also used : DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) IOException(java.io.IOException) FlushResult(org.apache.hadoop.hbase.regionserver.Region.FlushResult) RemoteException(org.apache.hadoop.ipc.RemoteException)

Example 3 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class TestSplitWalDataLoss method test.

@Test
public void test() throws IOException, InterruptedException {
    final HRegionServer rs = testUtil.getRSForFirstRegionInTable(tableName);
    final HRegion region = (HRegion) rs.getOnlineRegions(tableName).get(0);
    HRegion spiedRegion = spy(region);
    final MutableBoolean flushed = new MutableBoolean(false);
    final MutableBoolean reported = new MutableBoolean(false);
    doAnswer(new Answer<FlushResult>() {

        @Override
        public FlushResult answer(InvocationOnMock invocation) throws Throwable {
            synchronized (flushed) {
                flushed.setValue(true);
                flushed.notifyAll();
            }
            synchronized (reported) {
                while (!reported.booleanValue()) {
                    reported.wait();
                }
            }
            rs.getWAL(region.getRegionInfo()).abortCacheFlush(region.getRegionInfo().getEncodedNameAsBytes());
            throw new DroppedSnapshotException("testcase");
        }
    }).when(spiedRegion).internalFlushCacheAndCommit(Matchers.<WAL>any(), Matchers.<MonitoredTask>any(), Matchers.<PrepareFlushResult>any(), Matchers.<Collection<Store>>any());
    // Find region key; don't pick up key for hbase:meta by mistake.
    String key = null;
    for (Map.Entry<String, Region> entry : rs.onlineRegions.entrySet()) {
        if (entry.getValue().getRegionInfo().getTable().equals(this.tableName)) {
            key = entry.getKey();
            break;
        }
    }
    rs.onlineRegions.put(key, spiedRegion);
    Connection conn = testUtil.getConnection();
    try (Table table = conn.getTable(tableName)) {
        table.put(new Put(Bytes.toBytes("row0")).addColumn(family, qualifier, Bytes.toBytes("val0")));
    }
    long oldestSeqIdOfStore = region.getOldestSeqIdOfStore(family);
    LOG.info("CHANGE OLDEST " + oldestSeqIdOfStore);
    assertTrue(oldestSeqIdOfStore > HConstants.NO_SEQNUM);
    rs.cacheFlusher.requestFlush(spiedRegion, false);
    synchronized (flushed) {
        while (!flushed.booleanValue()) {
            flushed.wait();
        }
    }
    try (Table table = conn.getTable(tableName)) {
        table.put(new Put(Bytes.toBytes("row1")).addColumn(family, qualifier, Bytes.toBytes("val1")));
    }
    long now = EnvironmentEdgeManager.currentTime();
    rs.tryRegionServerReport(now - 500, now);
    synchronized (reported) {
        reported.setValue(true);
        reported.notifyAll();
    }
    while (testUtil.getRSForFirstRegionInTable(tableName) == rs) {
        Thread.sleep(100);
    }
    try (Table table = conn.getTable(tableName)) {
        Result result = table.get(new Get(Bytes.toBytes("row0")));
        assertArrayEquals(Bytes.toBytes("val0"), result.getValue(family, qualifier));
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) MutableBoolean(org.apache.commons.lang.mutable.MutableBoolean) Connection(org.apache.hadoop.hbase.client.Connection) PrepareFlushResult(org.apache.hadoop.hbase.regionserver.HRegion.PrepareFlushResult) FlushResult(org.apache.hadoop.hbase.regionserver.Region.FlushResult) Put(org.apache.hadoop.hbase.client.Put) PrepareFlushResult(org.apache.hadoop.hbase.regionserver.HRegion.PrepareFlushResult) Result(org.apache.hadoop.hbase.client.Result) FlushResult(org.apache.hadoop.hbase.regionserver.Region.FlushResult) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Get(org.apache.hadoop.hbase.client.Get) Map(java.util.Map) Test(org.junit.Test)

Example 4 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class TestHRegion method testCloseWithFailingFlush.

@Test
public void testCloseWithFailingFlush() throws Exception {
    final Configuration conf = HBaseConfiguration.create(CONF);
    final WAL wal = createWALCompatibleWithFaultyFileSystem(method, conf, tableName);
    // Only retry once.
    conf.setInt("hbase.hstore.flush.retries.number", 1);
    final User user = User.createUserForTesting(conf, this.method, new String[] { "foo" });
    // Inject our faulty LocalFileSystem
    conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class);
    user.runAs(new PrivilegedExceptionAction<Object>() {

        @Override
        public Object run() throws Exception {
            // Make sure it worked (above is sensitive to caching details in hadoop core)
            FileSystem fs = FileSystem.get(conf);
            Assert.assertEquals(FaultyFileSystem.class, fs.getClass());
            FaultyFileSystem ffs = (FaultyFileSystem) fs;
            HRegion region = null;
            try {
                // Initialize region
                region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES);
                long size = region.getMemstoreSize();
                Assert.assertEquals(0, size);
                // Put one item into memstore.  Measure the size of one item in memstore.
                Put p1 = new Put(row);
                p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[]) null));
                region.put(p1);
                // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only.
                Store store = region.getStore(COLUMN_FAMILY_BYTES);
                StoreFlushContext storeFlushCtx = store.createFlushContext(12345);
                storeFlushCtx.prepare();
                // Now add two entries to the foreground memstore.
                Put p2 = new Put(row);
                p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[]) null));
                p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[]) null));
                region.put(p2);
                // Now try close on top of a failing flush.
                region.close();
                fail();
            } catch (DroppedSnapshotException dse) {
                // Expected
                LOG.info("Expected DroppedSnapshotException");
            } finally {
                // Make it so all writes succeed from here on out so can close clean
                ffs.fault.set(false);
                HBaseTestingUtility.closeRegionAndWAL(region);
            }
            return null;
        }
    });
    FileSystem.closeAllForUGI(user.getUGI());
}
Also used : WAL(org.apache.hadoop.hbase.wal.WAL) MetricsWAL(org.apache.hadoop.hbase.regionserver.wal.MetricsWAL) User(org.apache.hadoop.hbase.security.User) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) FailedSanityCheckException(org.apache.hadoop.hbase.exceptions.FailedSanityCheckException) RegionTooBusyException(org.apache.hadoop.hbase.RegionTooBusyException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) NotServingRegionException(org.apache.hadoop.hbase.NotServingRegionException) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) Put(org.apache.hadoop.hbase.client.Put) FileSystem(org.apache.hadoop.fs.FileSystem) FaultyFileSystem(org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem) FaultyFileSystem(org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem) Test(org.junit.Test)

Example 5 with DroppedSnapshotException

use of org.apache.hadoop.hbase.DroppedSnapshotException in project hbase by apache.

the class TestFailedAppendAndSync method testLockupAroundBadAssignSync.

/**
   * Reproduce locking up that happens when we get an exceptions appending and syncing.
   * See HBASE-14317.
   * First I need to set up some mocks for Server and RegionServerServices. I also need to
   * set up a dodgy WAL that will throw an exception when we go to append to it.
   */
@Test(timeout = 300000)
public void testLockupAroundBadAssignSync() throws IOException {
    final AtomicLong rolls = new AtomicLong(0);
    // Dodgy WAL. Will throw exceptions when flags set.
    class DodgyFSLog extends FSHLog {

        volatile boolean throwSyncException = false;

        volatile boolean throwAppendException = false;

        public DodgyFSLog(FileSystem fs, Path root, String logDir, Configuration conf) throws IOException {
            super(fs, root, logDir, conf);
        }

        @Override
        public byte[][] rollWriter(boolean force) throws FailedLogCloseException, IOException {
            byte[][] regions = super.rollWriter(force);
            rolls.getAndIncrement();
            return regions;
        }

        @Override
        protected Writer createWriterInstance(Path path) throws IOException {
            final Writer w = super.createWriterInstance(path);
            return new Writer() {

                @Override
                public void close() throws IOException {
                    w.close();
                }

                @Override
                public void sync() throws IOException {
                    if (throwSyncException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...");
                    }
                    w.sync();
                }

                @Override
                public void append(Entry entry) throws IOException {
                    if (throwAppendException) {
                        throw new IOException("FAKE! Failed to replace a bad datanode...");
                    }
                    w.append(entry);
                }

                @Override
                public long getLength() {
                    return w.getLength();
                }
            };
        }
    }
    // Make up mocked server and services.
    Server server = mock(Server.class);
    when(server.getConfiguration()).thenReturn(CONF);
    when(server.isStopped()).thenReturn(false);
    when(server.isAborted()).thenReturn(false);
    RegionServerServices services = mock(RegionServerServices.class);
    // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with
    // the test.
    FileSystem fs = FileSystem.get(CONF);
    Path rootDir = new Path(dir + getName());
    DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
    LogRoller logRoller = new LogRoller(server, services);
    logRoller.addWAL(dodgyWAL);
    logRoller.start();
    boolean threwOnSync = false;
    boolean threwOnAppend = false;
    boolean threwOnBoth = false;
    HRegion region = initHRegion(tableName, null, null, dodgyWAL);
    try {
        // Get some random bytes.
        byte[] value = Bytes.toBytes(getName());
        try {
            // First get something into memstore
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value);
            region.put(put);
        } catch (IOException ioe) {
            fail();
        }
        long rollsCount = rolls.get();
        try {
            dodgyWAL.throwAppendException = true;
            dodgyWAL.throwSyncException = false;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnAppend = true;
        }
        while (rollsCount == rolls.get()) Threads.sleep(100);
        rollsCount = rolls.get();
        try {
            dodgyWAL.throwAppendException = true;
            dodgyWAL.throwSyncException = true;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnBoth = true;
        }
        while (rollsCount == rolls.get()) Threads.sleep(100);
        // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able
        // to just continue.
        // So, should be no abort at this stage. Verify.
        Mockito.verify(server, Mockito.atLeast(0)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
        try {
            dodgyWAL.throwAppendException = false;
            dodgyWAL.throwSyncException = true;
            Put put = new Put(value);
            put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value);
            region.put(put);
        } catch (IOException ioe) {
            threwOnSync = true;
        }
        // happens. If it don't we'll timeout the whole test. That is fine.
        while (true) {
            try {
                Mockito.verify(server, Mockito.atLeast(1)).abort(Mockito.anyString(), (Throwable) Mockito.anyObject());
                break;
            } catch (WantedButNotInvoked t) {
                Threads.sleep(1);
            }
        }
    } finally {
        // To stop logRoller, its server has to say it is stopped.
        Mockito.when(server.isStopped()).thenReturn(true);
        if (logRoller != null)
            logRoller.close();
        if (region != null) {
            try {
                region.close(true);
            } catch (DroppedSnapshotException e) {
                LOG.info("On way out; expected!", e);
            }
        }
        if (dodgyWAL != null)
            dodgyWAL.close();
        assertTrue("The regionserver should have thrown an exception", threwOnBoth);
        assertTrue("The regionserver should have thrown an exception", threwOnAppend);
        assertTrue("The regionserver should have thrown an exception", threwOnSync);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Server(org.apache.hadoop.hbase.Server) DroppedSnapshotException(org.apache.hadoop.hbase.DroppedSnapshotException) IOException(java.io.IOException) Put(org.apache.hadoop.hbase.client.Put) FSHLog(org.apache.hadoop.hbase.regionserver.wal.FSHLog) AtomicLong(java.util.concurrent.atomic.AtomicLong) FileSystem(org.apache.hadoop.fs.FileSystem) Writer(org.apache.hadoop.hbase.wal.WALProvider.Writer) WantedButNotInvoked(org.mockito.exceptions.verification.WantedButNotInvoked) Test(org.junit.Test)

Aggregations

DroppedSnapshotException (org.apache.hadoop.hbase.DroppedSnapshotException)12 IOException (java.io.IOException)9 InterruptedIOException (java.io.InterruptedIOException)7 Test (org.junit.Test)7 Put (org.apache.hadoop.hbase.client.Put)6 ArrayList (java.util.ArrayList)4 Configuration (org.apache.hadoop.conf.Configuration)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 Path (org.apache.hadoop.fs.Path)3 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)3 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)3 FaultyFileSystem (org.apache.hadoop.hbase.regionserver.TestStore.FaultyFileSystem)3 AbstractList (java.util.AbstractList)2 List (java.util.List)2 Cell (org.apache.hadoop.hbase.Cell)2 HBaseIOException (org.apache.hadoop.hbase.HBaseIOException)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 NotServingRegionException (org.apache.hadoop.hbase.NotServingRegionException)2 RegionTooBusyException (org.apache.hadoop.hbase.RegionTooBusyException)2 FailedSanityCheckException (org.apache.hadoop.hbase.exceptions.FailedSanityCheckException)2