use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestBookieHealthCheck method testNoQuarantineOnExpectedBkErrors.
@Test
public void testNoQuarantineOnExpectedBkErrors() throws Exception {
final LedgerHandle lh = bkc.createLedger(2, 2, 2, BookKeeper.DigestType.CRC32, new byte[] {});
final int numEntries = 10;
for (int i = 0; i < numEntries; i++) {
byte[] msg = ("msg-" + i).getBytes();
lh.addEntry(msg);
}
BookieSocketAddress bookie1 = lh.getLedgerMetadata().getEnsemble(0).get(0);
BookieSocketAddress bookie2 = lh.getLedgerMetadata().getEnsemble(0).get(1);
try {
// we read an entry that is not added
lh.readEntries(10, 10);
} catch (BKException e) {
// ok
}
// make sure the health check runs once
Thread.sleep(baseClientConf.getBookieHealthCheckIntervalSeconds() * 2 * 1000);
// the bookie watcher should not contain the bookieToRestart in the quarantine set
Assert.assertFalse(bkc.bookieWatcher.quarantinedBookies.asMap().containsKey(bookie1));
Assert.assertFalse(bkc.bookieWatcher.quarantinedBookies.asMap().containsKey(bookie2));
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestSpeculativeRead method testSpeculativeRead.
/**
* Test basic speculative functionality.
* - Create 2 clients with read timeout disabled, one with spec
* read enabled, the other not.
* - create ledger
* - sleep second bookie in ensemble
* - read first entry, both should find on first bookie.
* - read second bookie, spec client should find on bookie three,
* non spec client should hang.
*/
@Test
public void testSpeculativeRead() throws Exception {
long id = getLedgerToRead(3, 2);
// disabled
BookKeeperTestClient bknospec = createClient(0);
BookKeeperTestClient bkspec = createClient(2000);
LedgerHandle lnospec = bknospec.openLedger(id, digestType, passwd);
LedgerHandle lspec = bkspec.openLedger(id, digestType, passwd);
// sleep second bookie
CountDownLatch sleepLatch = new CountDownLatch(1);
BookieSocketAddress second = lnospec.getLedgerMetadata().getEnsembles().get(0L).get(1);
sleepBookie(second, sleepLatch);
try {
// read first entry, both go to first bookie, should be fine
LatchCallback nospeccb = new LatchCallback();
LatchCallback speccb = new LatchCallback();
lnospec.asyncReadEntries(0, 0, nospeccb, null);
lspec.asyncReadEntries(0, 0, speccb, null);
nospeccb.expectSuccess(2000);
speccb.expectSuccess(2000);
// read second entry, both look for second book, spec read client
// tries third bookie, nonspec client hangs as read timeout is very long.
nospeccb = new LatchCallback();
speccb = new LatchCallback();
lnospec.asyncReadEntries(1, 1, nospeccb, null);
lspec.asyncReadEntries(1, 1, speccb, null);
speccb.expectSuccess(4000);
nospeccb.expectTimeout(4000);
// Check that the second bookie is registered as slow at entryId 1
RackawareEnsemblePlacementPolicy rep = (RackawareEnsemblePlacementPolicy) lspec.bk.placementPolicy;
assertTrue(rep.slowBookies.asMap().size() == 1);
assertTrue("Stats should not reflect speculative reads if disabled", bknospec.getTestStatsProvider().getCounter(CLIENT_SCOPE + "." + SPECULATIVE_READ_COUNT).get() == 0);
assertTrue("Stats should reflect speculative reads", bkspec.getTestStatsProvider().getCounter(CLIENT_SCOPE + "." + SPECULATIVE_READ_COUNT).get() > 0);
} finally {
sleepLatch.countDown();
lspec.close();
lnospec.close();
bkspec.close();
bknospec.close();
}
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestSpeculativeRead method testSpeculativeReadScheduling.
/**
* Unit test for the speculative read scheduling method.
*/
@Test
public void testSpeculativeReadScheduling() throws Exception {
long id = getLedgerToRead(3, 2);
int timeout = 1000;
BookKeeper bkspec = createClient(timeout);
LedgerHandle l = bkspec.openLedger(id, digestType, passwd);
ArrayList<BookieSocketAddress> ensemble = l.getLedgerMetadata().getEnsembles().get(0L);
BitSet allHosts = new BitSet(ensemble.size());
for (int i = 0; i < ensemble.size(); i++) {
allHosts.set(i, true);
}
BitSet noHost = new BitSet(ensemble.size());
BitSet secondHostOnly = new BitSet(ensemble.size());
secondHostOnly.set(1, true);
PendingReadOp.LedgerEntryRequest req0 = null, req2 = null, req4 = null;
try {
PendingReadOp op = new PendingReadOp(l, bkspec.scheduler, 0, 5);
// if we've already heard from all hosts,
// we only send the initial read
req0 = op.new SequenceReadRequest(ensemble, l.getId(), 0);
assertTrue("Should have sent to first", req0.maybeSendSpeculativeRead(allHosts).equals(ensemble.get(0)));
assertNull("Should not have sent another", req0.maybeSendSpeculativeRead(allHosts));
// if we have heard from some hosts, but not one we have sent to
// send again
req2 = op.new SequenceReadRequest(ensemble, l.getId(), 2);
assertTrue("Should have sent to third", req2.maybeSendSpeculativeRead(noHost).equals(ensemble.get(2)));
assertTrue("Should have sent to first", req2.maybeSendSpeculativeRead(secondHostOnly).equals(ensemble.get(0)));
// if we have heard from some hosts, which includes one we sent to
// do not read again
req4 = op.new SequenceReadRequest(ensemble, l.getId(), 4);
assertTrue("Should have sent to second", req4.maybeSendSpeculativeRead(noHost).equals(ensemble.get(1)));
assertNull("Should not have sent another", req4.maybeSendSpeculativeRead(secondHostOnly));
} finally {
for (PendingReadOp.LedgerEntryRequest req : new PendingReadOp.LedgerEntryRequest[] { req0, req2, req4 }) {
if (req != null) {
int i = 0;
while (!req.isComplete()) {
if (i++ > 10) {
// wait for up to 10 seconds
break;
}
Thread.sleep(1000);
}
assertTrue("Request should be done", req0.isComplete());
}
}
l.close();
bkspec.close();
}
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestSpeculativeRead method testSpeculativeReadMultipleReplicasDown.
/**
* Test that if more than one replica is down, we can still read, as long as the quorum
* size is larger than the number of down replicas.
*/
@Test
public void testSpeculativeReadMultipleReplicasDown() throws Exception {
long id = getLedgerToRead(5, 5);
int timeout = 5000;
BookKeeper bkspec = createClient(timeout);
LedgerHandle l = bkspec.openLedger(id, digestType, passwd);
// sleep bookie 1, 2 & 4
CountDownLatch sleepLatch = new CountDownLatch(1);
sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(1), sleepLatch);
sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(2), sleepLatch);
sleepBookie(l.getLedgerMetadata().getEnsembles().get(0L).get(4), sleepLatch);
try {
// read first entry, should complete faster than timeout
// as bookie 0 has the entry
LatchCallback latch0 = new LatchCallback();
l.asyncReadEntries(0, 0, latch0, null);
latch0.expectSuccess(timeout / 2);
// second should have to hit two timeouts (bookie 1 & 2)
// bookie 3 has the entry
LatchCallback latch1 = new LatchCallback();
l.asyncReadEntries(1, 1, latch1, null);
latch1.expectTimeout(timeout);
latch1.expectSuccess(timeout * 2);
LOG.info("Timeout {} latch1 duration {}", timeout, latch1.getDuration());
assertTrue("should have taken longer than two timeouts, but less than 3", latch1.getDuration() >= timeout * 2 && latch1.getDuration() < timeout * 3);
// bookies 1 & 2 should be registered as slow bookies because of speculative reads
Set<BookieSocketAddress> expectedSlowBookies = new HashSet<>();
expectedSlowBookies.add(l.getLedgerMetadata().getEnsembles().get(0L).get(1));
expectedSlowBookies.add(l.getLedgerMetadata().getEnsembles().get(0L).get(2));
assertEquals(((RackawareEnsemblePlacementPolicy) l.bk.placementPolicy).slowBookies.asMap().keySet(), expectedSlowBookies);
// third should not hit timeouts since bookies 1 & 2 are registered as slow
// bookie 3 has the entry
LatchCallback latch2 = new LatchCallback();
l.asyncReadEntries(2, 2, latch2, null);
latch2.expectSuccess(timeout);
// fourth should have no timeout
// bookie 3 has the entry
LatchCallback latch3 = new LatchCallback();
l.asyncReadEntries(3, 3, latch3, null);
latch3.expectSuccess(timeout / 2);
// fifth should hit one timeout, (bookie 4)
// bookie 0 has the entry
LatchCallback latch4 = new LatchCallback();
l.asyncReadEntries(4, 4, latch4, null);
latch4.expectTimeout(timeout / 2);
latch4.expectSuccess(timeout);
LOG.info("Timeout {} latch4 duration {}", timeout, latch4.getDuration());
assertTrue("should have taken longer than one timeout, but less than 2", latch4.getDuration() >= timeout && latch4.getDuration() < timeout * 2);
} finally {
sleepLatch.countDown();
l.close();
bkspec.close();
}
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class ParallelLedgerRecoveryTest method testRecoveryWhenClosingLedgerHandle.
@Test
public void testRecoveryWhenClosingLedgerHandle() throws Exception {
byte[] passwd = "recovery-when-closing-ledger-handle".getBytes(UTF_8);
ClientConfiguration newConf = new ClientConfiguration();
newConf.addConfiguration(baseClientConf);
newConf.setEnableParallelRecoveryRead(true);
newConf.setRecoveryReadBatchSize(1);
newConf.setAddEntryTimeout(9999999);
newConf.setReadEntryTimeout(9999999);
final BookKeeper newBk0 = new BookKeeper(newConf);
final LedgerHandle lh0 = newBk0.createLedger(1, 1, 1, digestType, passwd);
final BookKeeper newBk1 = new BookKeeper(newConf);
final LedgerHandle lh1 = newBk1.openLedgerNoRecovery(lh0.getId(), digestType, passwd);
final TestLedgerManager tlm1 = (TestLedgerManager) newBk1.getUnderlyingLedgerManager();
final BookKeeper readBk = new BookKeeper(newConf);
final LedgerHandle readLh = readBk.openLedgerNoRecovery(lh0.getId(), digestType, passwd);
LOG.info("Create ledger {}", lh0.getId());
// 0) place the bookie with a fake bookie
BookieSocketAddress address = lh0.getLedgerMetadata().currentEnsemble.get(0);
ServerConfiguration conf = killBookie(address);
conf.setLedgerStorageClass(InterleavedLedgerStorage.class.getName());
DelayResponseBookie fakeBookie = new DelayResponseBookie(conf);
bs.add(startBookie(conf, fakeBookie));
bsConfs.add(conf);
// 1) bk0 write two entries
lh0.addEntry("entry-0".getBytes(UTF_8));
lh0.addEntry("entry-1".getBytes(UTF_8));
// 2) readBk read last add confirmed
long lac = readLh.readLastConfirmed();
assertEquals(0L, lac);
lac = lh1.readLastConfirmed();
assertEquals(0L, lac);
final CountDownLatch addLatch = new CountDownLatch(3);
final AtomicInteger numAddFailures = new AtomicInteger(0);
// 3) bk0 write more entries in parallel
fakeBookie.delayAdd(true);
for (int i = 2; i < 5; i++) {
lh0.asyncAddEntry(("entry-" + i).getBytes(UTF_8), new AsyncCallback.AddCallback() {
@Override
public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) {
if (BKException.Code.OK != rc) {
numAddFailures.incrementAndGet();
}
addLatch.countDown();
}
}, null);
}
while (fakeBookie.delayQueue.size() < 3) {
// wait until all add requests are queued
Thread.sleep(100);
}
// 4) lac moved to 1L
lac = readLh.readLastConfirmed();
assertEquals(1L, lac);
lac = lh1.readLastConfirmed();
assertEquals(1L, lac);
// 5) bk1 is doing recovery, but the metadata update is delayed
final CountDownLatch readLatch = new CountDownLatch(1);
fakeBookie.delayAdd(false);
fakeBookie.delayRead(true, 3L, readLatch);
final CountDownLatch metadataLatch = new CountDownLatch(1);
tlm1.setLatch(metadataLatch);
final CountDownLatch recoverLatch = new CountDownLatch(1);
final AtomicBoolean recoverSuccess = new AtomicBoolean(false);
lh1.recover(new GenericCallback<Void>() {
@Override
public void operationComplete(int rc, Void result) {
LOG.info("Recovering ledger {} completed : {}", lh1.getId(), rc);
recoverSuccess.set(BKException.Code.OK == rc);
recoverLatch.countDown();
}
});
Thread.sleep(2000);
readLatch.countDown();
// we don't expected lac being updated before we successfully marked the ledger in recovery
lac = readLh.readLastConfirmed();
assertEquals(1L, lac);
// 6) bk0 closes ledger before bk1 marks in recovery
lh0.close();
assertEquals(1L, lh0.getLastAddConfirmed());
// 7) bk1 proceed recovery and succeed
metadataLatch.countDown();
recoverLatch.await();
assertTrue(recoverSuccess.get());
assertEquals(1L, lh1.getLastAddConfirmed());
// 8) make sure we won't see lac advanced during ledger is closed by bk0 and recovered by bk1
final AtomicLong lacHolder = new AtomicLong(-1234L);
final AtomicInteger rcHolder = new AtomicInteger(-1234);
final CountDownLatch doneLatch = new CountDownLatch(1);
new ReadLastConfirmedOp(readLh, new ReadLastConfirmedOp.LastConfirmedDataCallback() {
@Override
public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData data) {
rcHolder.set(rc);
lacHolder.set(data.getLastAddConfirmed());
doneLatch.countDown();
}
}).initiate();
doneLatch.await();
assertEquals(BKException.Code.OK, rcHolder.get());
assertEquals(1L, lacHolder.get());
newBk0.close();
newBk1.close();
readBk.close();
}
Aggregations