use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestWALLockup method testLockupWhenSyncInMiddleOfZigZagSetup.
/**
* Reproduce locking up that happens when we get an inopportune sync during setup for
* zigzaglatch wait. See HBASE-14317. If below is broken, we will see this test timeout because
* it is locked up.
* <p>First I need to set up some mocks for Server and RegionServerServices. I also need to
* set up a dodgy WAL that will throw an exception when we go to append to it.
*/
@Test
public void testLockupWhenSyncInMiddleOfZigZagSetup() throws IOException {
// Mocked up server and regionserver services. Needed below.
RegionServerServices services = Mockito.mock(RegionServerServices.class);
Mockito.when(services.getConfiguration()).thenReturn(CONF);
Mockito.when(services.isStopped()).thenReturn(false);
Mockito.when(services.isAborted()).thenReturn(false);
// OK. Now I have my mocked up Server & RegionServerServices and dodgy WAL, go ahead with test.
FileSystem fs = FileSystem.get(CONF);
Path rootDir = new Path(dir + getName());
DodgyFSLog dodgyWAL = new DodgyFSLog(fs, rootDir, getName(), CONF);
dodgyWAL.init();
Path originalWAL = dodgyWAL.getCurrentFileName();
// I need a log roller running.
LogRoller logRoller = new LogRoller(services);
logRoller.addWAL(dodgyWAL);
// There is no 'stop' once a logRoller is running.. it just dies.
logRoller.start();
// Now get a region and start adding in edits.
final HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL);
byte[] bytes = Bytes.toBytes(getName());
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
scopes.put(COLUMN_FAMILY_BYTES, 0);
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
try {
// First get something into memstore. Make a Put and then pull the Cell out of it. Will
// manage append and sync carefully in below to manufacture hang. We keep adding same
// edit. WAL subsystem doesn't care.
Put put = new Put(bytes);
put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), bytes);
WALKeyImpl key = new WALKeyImpl(region.getRegionInfo().getEncodedNameAsBytes(), TableName.META_TABLE_NAME, EnvironmentEdgeManager.currentTime(), mvcc, scopes);
WALEdit edit = new WALEdit();
CellScanner CellScanner = put.cellScanner();
assertTrue(CellScanner.advance());
edit.add(CellScanner.current());
// out other side of the ringbuffer. If small numbers, stuff doesn't make it to WAL
for (int i = 0; i < 1000; i++) {
region.put(put);
}
// Set it so we start throwing exceptions.
LOG.info("SET throwing of exception on append");
dodgyWAL.throwException = true;
// This append provokes a WAL roll request
dodgyWAL.appendData(region.getRegionInfo(), key, edit);
boolean exception = false;
try {
dodgyWAL.sync(false);
} catch (Exception e) {
exception = true;
}
assertTrue("Did not get sync exception", exception);
// Get a memstore flush going too so we have same hung profile as up in the issue over
// in HBASE-14317. Flush hangs trying to get sequenceid because the ringbuffer is held up
// by the zigzaglatch waiting on syncs to come home.
Thread t = new Thread("Flusher") {
@Override
public void run() {
try {
if (region.getMemStoreDataSize() <= 0) {
throw new IOException("memstore size=" + region.getMemStoreDataSize());
}
region.flush(false);
} catch (IOException e) {
// Can fail trying to flush in middle of a roll. Not a failure. Will succeed later
// when roll completes.
LOG.info("In flush", e);
}
LOG.info("Exiting");
}
};
t.setDaemon(true);
t.start();
// Wait until
while (dodgyWAL.latch.getCount() > 0) {
Threads.sleep(1);
}
// Now assert I got a new WAL file put in place even though loads of errors above.
assertTrue(originalWAL != dodgyWAL.getCurrentFileName());
// Can I append to it?
dodgyWAL.throwException = false;
try {
region.put(put);
} catch (Exception e) {
LOG.info("In the put", e);
}
} finally {
// To stop logRoller, its server has to say it is stopped.
Mockito.when(services.isStopped()).thenReturn(true);
Closeables.close(logRoller, true);
try {
if (region != null) {
region.close();
}
if (dodgyWAL != null) {
dodgyWAL.close();
}
} catch (Exception e) {
LOG.info("On way out", e);
}
}
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class ClusterMarkingEntryFilter method filter.
@Override
public Entry filter(Entry entry) {
// don't replicate if the log entries have already been consumed by the cluster
if (replicationEndpoint.canReplicateToSameCluster() || !entry.getKey().getClusterIds().contains(peerClusterId)) {
WALEdit edit = entry.getEdit();
WALKeyImpl logKey = (WALKeyImpl) entry.getKey();
if (edit != null && !edit.isEmpty()) {
// Mark that the current cluster has the change
logKey.addClusterId(clusterId);
return entry;
}
}
return null;
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestLogRollAbort method testLogRollAfterSplitStart.
/**
* Tests the case where a RegionServer enters a GC pause,
* comes back online after the master declared it dead and started to split.
* Want log rolling after a master split to fail. See HBASE-2312.
*/
@Test
public void testLogRollAfterSplitStart() throws IOException {
LOG.info("Verify wal roll after split starts will fail.");
String logName = ServerName.valueOf("testLogRollAfterSplitStart", 16010, EnvironmentEdgeManager.currentTime()).toString();
Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName));
final WALFactory wals = new WALFactory(conf, logName);
try {
// put some entries in an WAL
TableName tableName = TableName.valueOf(this.getClass().getName());
RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).build();
WAL log = wals.getWAL(regionInfo);
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);
int total = 20;
for (int i = 0; i < total; i++) {
WALEdit kvs = new WALEdit();
kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
scopes.put(Bytes.toBytes("column"), 0);
log.appendData(regionInfo, new WALKeyImpl(regionInfo.getEncodedNameAsBytes(), tableName, EnvironmentEdgeManager.currentTime(), mvcc, scopes), kvs);
}
// Send the data to HDFS datanodes and close the HDFS writer
log.sync();
((AbstractFSWAL<?>) log).replaceWriter(((FSHLog) log).getOldPath(), null, null);
// code taken from MasterFileSystem.getLogDirs(), which is called from
// MasterFileSystem.splitLog() handles RS shutdowns (as observed by the splitting process)
// rename the directory so a rogue RS doesn't create more WALs
Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
if (!fs.rename(thisTestsDir, rsSplitDir)) {
throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
}
LOG.debug("Renamed region directory: " + rsSplitDir);
LOG.debug("Processing the old log files.");
WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
LOG.debug("Trying to roll the WAL.");
try {
log.rollWriter();
Assert.fail("rollWriter() did not throw any exception.");
} catch (IOException ioe) {
if (ioe.getCause() instanceof FileNotFoundException) {
LOG.info("Got the expected exception: ", ioe.getCause());
} else {
Assert.fail("Unexpected exception: " + ioe);
}
}
} finally {
wals.close();
if (fs.exists(thisTestsDir)) {
fs.delete(thisTestsDir, true);
}
}
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestRegionReplicationSink method testSizeCapacity.
@Test
public void testSizeCapacity() {
MutableInt next = new MutableInt(0);
List<CompletableFuture<Void>> futures = Stream.generate(() -> new CompletableFuture<Void>()).limit(6).collect(Collectors.toList());
when(conn.replicate(any(), anyList(), anyInt(), anyLong(), anyLong())).then(i -> futures.get(next.getAndIncrement()));
for (int i = 0; i < 3; i++) {
ServerCall<?> rpcCall = mock(ServerCall.class);
WALKeyImpl key = mock(WALKeyImpl.class);
when(key.estimatedSerializedSizeOf()).thenReturn(100L);
when(key.getSequenceId()).thenReturn(i + 1L);
WALEdit edit = mock(WALEdit.class);
when(edit.estimatedSerializedSizeOf()).thenReturn((i + 1) * 600L * 1024);
when(manager.increase(anyLong())).thenReturn(true);
sink.add(key, edit, rpcCall);
}
// the first entry will be send out immediately
verify(conn, times(2)).replicate(any(), anyList(), anyInt(), anyLong(), anyLong());
// complete the first send
futures.get(0).complete(null);
futures.get(1).complete(null);
// we should have another batch
verify(conn, times(4)).replicate(any(), anyList(), anyInt(), anyLong(), anyLong());
// complete the second send
futures.get(2).complete(null);
futures.get(3).complete(null);
// the size of the second entry is greater than 1024 * 1024, so we will have another batch
verify(conn, times(6)).replicate(any(), anyList(), anyInt(), anyLong(), anyLong());
// complete the third send
futures.get(4).complete(null);
futures.get(5).complete(null);
// should have send out all so no pending entries.
assertEquals(0, sink.pendingSize());
}
use of org.apache.hadoop.hbase.wal.WALEdit in project hbase by apache.
the class TestRegionReplicationSink method testAddToFailedReplica.
@Test
public void testAddToFailedReplica() {
MutableInt next = new MutableInt(0);
List<CompletableFuture<Void>> futures = Stream.generate(() -> new CompletableFuture<Void>()).limit(5).collect(Collectors.toList());
when(conn.replicate(any(), anyList(), anyInt(), anyLong(), anyLong())).then(i -> futures.get(next.getAndIncrement()));
ServerCall<?> rpcCall1 = mock(ServerCall.class);
WALKeyImpl key1 = mock(WALKeyImpl.class);
when(key1.estimatedSerializedSizeOf()).thenReturn(100L);
when(key1.getSequenceId()).thenReturn(1L);
WALEdit edit1 = mock(WALEdit.class);
when(edit1.estimatedSerializedSizeOf()).thenReturn(1000L);
when(manager.increase(anyLong())).thenReturn(true);
sink.add(key1, edit1, rpcCall1);
ServerCall<?> rpcCall2 = mock(ServerCall.class);
WALKeyImpl key2 = mock(WALKeyImpl.class);
when(key2.estimatedSerializedSizeOf()).thenReturn(200L);
when(key2.getSequenceId()).thenReturn(1L);
WALEdit edit2 = mock(WALEdit.class);
when(edit2.estimatedSerializedSizeOf()).thenReturn(2000L);
when(manager.increase(anyLong())).thenReturn(true);
sink.add(key2, edit2, rpcCall2);
// fail the call to replica 2
futures.get(0).complete(null);
futures.get(1).completeExceptionally(new IOException("inject error"));
// we should only call replicate once for edit2, since replica 2 is marked as failed
verify(conn, times(3)).replicate(any(), anyList(), anyInt(), anyLong(), anyLong());
futures.get(2).complete(null);
// should have send out all so no pending entries.
assertEquals(0, sink.pendingSize());
ServerCall<?> rpcCall3 = mock(ServerCall.class);
WALKeyImpl key3 = mock(WALKeyImpl.class);
when(key3.estimatedSerializedSizeOf()).thenReturn(200L);
when(key3.getSequenceId()).thenReturn(3L);
Map<byte[], List<Path>> committedFiles = td.getColumnFamilyNames().stream().collect(Collectors.toMap(Function.identity(), k -> Collections.emptyList(), (u, v) -> {
throw new IllegalStateException();
}, () -> new TreeMap<>(Bytes.BYTES_COMPARATOR)));
FlushDescriptor fd = ProtobufUtil.toFlushDescriptor(FlushAction.START_FLUSH, primary, 2L, committedFiles);
WALEdit edit3 = WALEdit.createFlushWALEdit(primary, fd);
sink.add(key3, edit3, rpcCall3);
// the flush marker should have cleared the failedReplicas, so we will send the edit to 2
// replicas again
verify(conn, times(5)).replicate(any(), anyList(), anyInt(), anyLong(), anyLong());
futures.get(3).complete(null);
futures.get(4).complete(null);
// should have send out all so no pending entries.
assertEquals(0, sink.pendingSize());
}
Aggregations