use of org.apache.bookkeeper.meta.LedgerUnderreplicationManager in project bookkeeper by apache.
the class AuditorPeriodicCheckTest method testEntryLogCorruption.
/**
* test that the periodic checking will detect corruptions in
* the bookie entry log.
*/
@Test
public void testEntryLogCorruption() throws Exception {
LedgerManagerFactory mFactory = driver.getLedgerManagerFactory();
LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager();
underReplicationManager.disableLedgerReplication();
LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes());
long ledgerId = lh.getId();
for (int i = 0; i < 100; i++) {
lh.addEntry("testdata".getBytes());
}
lh.close();
BookieAccessor.forceFlush(bs.get(0).getBookie());
File ledgerDir = bsConfs.get(0).getLedgerDirs()[0];
ledgerDir = Bookie.getCurrentDirectory(ledgerDir);
// corrupt of entryLogs
File[] entryLogs = ledgerDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".log");
}
});
ByteBuffer junk = ByteBuffer.allocate(1024 * 1024);
for (File f : entryLogs) {
FileOutputStream out = new FileOutputStream(f);
out.getChannel().write(junk);
out.close();
}
// restart to clear read buffers
restartBookies();
underReplicationManager.enableLedgerReplication();
long underReplicatedLedger = -1;
for (int i = 0; i < 10; i++) {
underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate();
if (underReplicatedLedger != -1) {
break;
}
Thread.sleep(CHECK_INTERVAL * 1000);
}
assertEquals("Ledger should be under replicated", ledgerId, underReplicatedLedger);
underReplicationManager.close();
}
use of org.apache.bookkeeper.meta.LedgerUnderreplicationManager in project bookkeeper by apache.
the class AuditorPeriodicCheckTest method testPeriodicCheckWhenDisabled.
/**
* Test that the period checker will not run when auto replication has been disabled.
*/
@Test
public void testPeriodicCheckWhenDisabled() throws Exception {
LedgerManagerFactory mFactory = driver.getLedgerManagerFactory();
final LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager();
final int numLedgers = 10;
final int numMsgs = 2;
final CountDownLatch completeLatch = new CountDownLatch(numMsgs * numLedgers);
final AtomicInteger rc = new AtomicInteger(BKException.Code.OK);
List<LedgerHandle> lhs = new ArrayList<LedgerHandle>();
for (int i = 0; i < numLedgers; i++) {
LedgerHandle lh = bkc.createLedger(3, 3, DigestType.CRC32, "passwd".getBytes());
lhs.add(lh);
for (int j = 0; j < 2; j++) {
lh.asyncAddEntry("testdata".getBytes(), new AddCallback() {
public void addComplete(int rc2, LedgerHandle lh, long entryId, Object ctx) {
if (rc.compareAndSet(BKException.Code.OK, rc2)) {
LOG.info("Failed to add entry : {}", BKException.getMessage(rc2));
}
completeLatch.countDown();
}
}, null);
}
}
completeLatch.await();
if (rc.get() != BKException.Code.OK) {
throw BKException.create(rc.get());
}
for (LedgerHandle lh : lhs) {
lh.close();
}
underReplicationManager.disableLedgerReplication();
final AtomicInteger numReads = new AtomicInteger(0);
ServerConfiguration conf = killBookie(0);
Bookie deadBookie = new Bookie(conf) {
@Override
public ByteBuf readEntry(long ledgerId, long entryId) throws IOException, NoLedgerException {
// we want to disable during checking
numReads.incrementAndGet();
throw new IOException("Fake I/O exception");
}
};
bsConfs.add(conf);
bs.add(startBookie(conf, deadBookie));
Thread.sleep(CHECK_INTERVAL * 2000);
assertEquals("Nothing should have tried to read", 0, numReads.get());
underReplicationManager.enableLedgerReplication();
// give it time to run
Thread.sleep(CHECK_INTERVAL * 2000);
underReplicationManager.disableLedgerReplication();
// give it time to stop, from this point nothing new should be marked
Thread.sleep(CHECK_INTERVAL * 2000);
int numUnderreplicated = 0;
long underReplicatedLedger = -1;
do {
underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate();
if (underReplicatedLedger == -1) {
break;
}
numUnderreplicated++;
underReplicationManager.markLedgerReplicated(underReplicatedLedger);
} while (underReplicatedLedger != -1);
// give a chance to run again (it shouldn't, it's disabled)
Thread.sleep(CHECK_INTERVAL * 2000);
// ensure that nothing is marked as underreplicated
underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate();
assertEquals("There should be no underreplicated ledgers", -1, underReplicatedLedger);
LOG.info("{} of {} ledgers underreplicated", numUnderreplicated, numUnderreplicated);
assertTrue("All should be underreplicated", numUnderreplicated <= numLedgers && numUnderreplicated > 0);
}
use of org.apache.bookkeeper.meta.LedgerUnderreplicationManager in project bookkeeper by apache.
the class AuditorPeriodicCheckTest method testFailedWriteRecovery.
/*
* Validates that the periodic ledger check will fix entries with a failed write.
*/
@Test
public void testFailedWriteRecovery() throws Exception {
LedgerManagerFactory mFactory = driver.getLedgerManagerFactory();
LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager();
underReplicationManager.disableLedgerReplication();
LedgerHandle lh = bkc.createLedger(2, 2, 1, DigestType.CRC32, "passwd".getBytes());
// kill one of the bookies and replace it with one that rejects write;
// This way we get into the under replication state
BookieSocketAddress replacedBookie = replaceBookieWithWriteFailingBookie(lh);
// Write a few entries; this should cause under replication
byte[] data = "foobar".getBytes();
data = "foobar".getBytes();
lh.addEntry(data);
lh.addEntry(data);
lh.addEntry(data);
lh.close();
// enable under replication detection and wait for it to report
// under replicated ledger
underReplicationManager.enableLedgerReplication();
long underReplicatedLedger = -1;
for (int i = 0; i < 5; i++) {
underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate();
if (underReplicatedLedger != -1) {
break;
}
Thread.sleep(CHECK_INTERVAL * 1000);
}
assertEquals("Ledger should be under replicated", lh.getId(), underReplicatedLedger);
// now start the replication workers
List<ReplicationWorker> l = new ArrayList<ReplicationWorker>();
for (int i = 0; i < numBookies; i++) {
ReplicationWorker rw = new ReplicationWorker(zkc, bsConfs.get(i), NullStatsLogger.INSTANCE);
rw.start();
l.add(rw);
}
underReplicationManager.close();
// Wait for ensemble to change after replication
Thread.sleep(3000);
for (ReplicationWorker rw : l) {
rw.shutdown();
}
// check that ensemble has changed and the bookie that rejected writes has
// been replaced in the ensemble
LedgerHandle newLh = bkc.openLedger(lh.getId(), DigestType.CRC32, "passwd".getBytes());
for (Map.Entry<Long, ArrayList<BookieSocketAddress>> e : LedgerHandleAdapter.getLedgerMetadata(newLh).getEnsembles().entrySet()) {
ArrayList<BookieSocketAddress> ensemble = e.getValue();
assertFalse("Ensemble hasn't been updated", ensemble.contains(replacedBookie));
}
newLh.close();
}
use of org.apache.bookkeeper.meta.LedgerUnderreplicationManager in project bookkeeper by apache.
the class TestLedgerUnderreplicationManager method testLocking.
/**
* Test locking for ledger unreplication manager.
* If there's only one ledger marked for rereplication,
* and one client has it, it should be locked; another
* client shouldn't be able to get it. If the first client dies
* however, the second client should be able to get it.
*/
@Test
public void testLocking() throws Exception {
String missingReplica = "localhost:3181";
LedgerUnderreplicationManager m1 = lmf1.newLedgerUnderreplicationManager();
LedgerUnderreplicationManager m2 = lmf2.newLedgerUnderreplicationManager();
Long ledger = 0xfeadeefdacL;
m1.markLedgerUnderreplicated(ledger, missingReplica);
Future<Long> f = getLedgerToReplicate(m1);
Long l = f.get(5, TimeUnit.SECONDS);
assertEquals("Should be the ledger I just marked", ledger, l);
f = getLedgerToReplicate(m2);
try {
f.get(1, TimeUnit.SECONDS);
fail("Shouldn't be able to find a ledger to replicate");
} catch (TimeoutException te) {
// correct behaviour
}
// should kill the lock
zkc1.close();
zkc1 = null;
l = f.get(5, TimeUnit.SECONDS);
assertEquals("Should be the ledger I marked", ledger, l);
}
use of org.apache.bookkeeper.meta.LedgerUnderreplicationManager in project bookkeeper by apache.
the class TestLedgerUnderreplicationManager method testEnableLedgerReplication.
/**
* Test enabling the ledger re-replication. After enableLedegerReplication,
* should continue getLedgerToRereplicate() task
*/
@Test
public void testEnableLedgerReplication() throws Exception {
isLedgerReplicationDisabled = true;
final LedgerUnderreplicationManager replicaMgr = lmf1.newLedgerUnderreplicationManager();
// simulate few urLedgers before disabling
final Long ledgerA = 0xfeadeefdacL;
final String missingReplica = "localhost:3181";
try {
replicaMgr.markLedgerUnderreplicated(ledgerA, missingReplica);
} catch (UnavailableException e) {
LOG.debug("Unexpected exception while marking urLedger", e);
fail("Unexpected exception while marking urLedger" + e.getMessage());
}
// disabling replication
replicaMgr.disableLedgerReplication();
LOG.debug("Disabled Ledeger Replication");
String znodeA = getUrLedgerZnode(ledgerA);
final CountDownLatch znodeLatch = new CountDownLatch(2);
String urledgerA = StringUtils.substringAfterLast(znodeA, "/");
String urLockLedgerA = basePath + "/locks/" + urledgerA;
zkc1.exists(urLockLedgerA, new Watcher() {
@Override
public void process(WatchedEvent event) {
if (event.getType() == EventType.NodeCreated) {
znodeLatch.countDown();
LOG.debug("Recieved node creation event for the zNodePath:" + event.getPath());
}
}
});
// getLedgerToRereplicate is waiting until enable rereplication
Thread thread1 = new Thread() {
@Override
public void run() {
try {
Long lA = replicaMgr.getLedgerToRereplicate();
assertEquals("Should be the ledger I just marked", lA, ledgerA);
isLedgerReplicationDisabled = false;
znodeLatch.countDown();
} catch (UnavailableException e) {
LOG.debug("Unexpected exception while marking urLedger", e);
isLedgerReplicationDisabled = false;
}
}
};
thread1.start();
try {
assertFalse("shouldn't complete", znodeLatch.await(1, TimeUnit.SECONDS));
assertTrue("Ledger replication is not disabled!", isLedgerReplicationDisabled);
assertEquals("Failed to disable ledger replication!", 2, znodeLatch.getCount());
replicaMgr.enableLedgerReplication();
znodeLatch.await(5, TimeUnit.SECONDS);
LOG.debug("Enabled Ledeger Replication");
assertTrue("Ledger replication is not disabled!", !isLedgerReplicationDisabled);
assertEquals("Failed to disable ledger replication!", 0, znodeLatch.getCount());
} finally {
thread1.interrupt();
}
}
Aggregations