use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class BookieRecoveryTest method testBookieRecoveryOnOpenedLedgers.
@Test
public void testBookieRecoveryOnOpenedLedgers() throws Exception {
// Create the ledgers
int numLedgers = 3;
List<LedgerHandle> lhs = createLedgers(numLedgers, numBookies, 2);
// Write the entries for the ledgers with dummy values
int numMsgs = 10;
writeEntriestoLedgers(numMsgs, 0, lhs);
// Shutdown the first bookie server
ArrayList<BookieSocketAddress> lastEnsemble = lhs.get(0).getLedgerMetadata().getEnsembles().entrySet().iterator().next().getValue();
BookieSocketAddress bookieToKill = lastEnsemble.get(lastEnsemble.size() - 1);
killBookie(bookieToKill);
// start a new bookie
startNewBookie();
LOG.info("Now recover the data on the killed bookie (" + bookieToKill + ") and replicate it to a random available one");
bkAdmin.recoverBookieData(bookieToKill);
for (LedgerHandle lh : lhs) {
assertTrue("Not fully replicated", verifyFullyReplicated(lh, numMsgs));
}
try {
// we can't write entries
writeEntriestoLedgers(numMsgs, 0, lhs);
fail("should not reach here");
} catch (Exception e) {
}
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class LedgerRecoveryTest method testLedgerRecoveryWithRollingRestart.
/**
* {@link https://issues.apache.org/jira/browse/BOOKKEEPER-355}
* A recovery during a rolling restart shouldn't affect the ability
* to recovery the ledger later.
* We have a ledger on ensemble B1,B2,B3.
* The sequence of events is
* 1. B1 brought down for maintenance
* 2. Ledger recovery started
* 3. B2 answers read last confirmed.
* 4. B1 replaced in ensemble by B4
* 5. Write to B4 fails for some reason
* 6. B1 comes back up.
* 7. B2 goes down for maintenance.
* 8. Ledger recovery starts (ledger is now unavailable)
*/
@Test
public void testLedgerRecoveryWithRollingRestart() throws Exception {
LedgerHandle lhbefore = bkc.createLedger(numBookies, 2, digestType, "".getBytes());
for (int i = 0; i < (numBookies * 3) + 1; i++) {
lhbefore.addEntry("data".getBytes());
}
// Add a dead bookie to the cluster
ServerConfiguration conf = newServerConfiguration();
Bookie deadBookie1 = new Bookie(conf) {
@Override
public void recoveryAddEntry(ByteBuf entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException {
// drop request to simulate a slow and failed bookie
throw new IOException("Couldn't write for some reason");
}
};
bsConfs.add(conf);
bs.add(startBookie(conf, deadBookie1));
// kill first bookie server
BookieSocketAddress bookie1 = lhbefore.getLedgerMetadata().currentEnsemble.get(0);
ServerConfiguration conf1 = killBookie(bookie1);
// ensemble in the ensemble, and another bookie is available in zk, but not writtable
try {
bkc.openLedger(lhbefore.getId(), digestType, "".getBytes());
fail("Shouldn't be able to open ledger, there should be entries missing");
} catch (BKException.BKLedgerRecoveryException e) {
// expected
}
// restart the first server, kill the second
bsConfs.add(conf1);
bs.add(startBookie(conf1));
BookieSocketAddress bookie2 = lhbefore.getLedgerMetadata().currentEnsemble.get(1);
ServerConfiguration conf2 = killBookie(bookie2);
// using async, because this could trigger an assertion
final AtomicInteger returnCode = new AtomicInteger(0);
final CountDownLatch openLatch = new CountDownLatch(1);
bkc.asyncOpenLedger(lhbefore.getId(), digestType, "".getBytes(), new AsyncCallback.OpenCallback() {
public void openComplete(int rc, LedgerHandle lh, Object ctx) {
returnCode.set(rc);
openLatch.countDown();
if (rc == BKException.Code.OK) {
try {
lh.close();
} catch (Exception e) {
LOG.error("Exception closing ledger handle", e);
}
}
}
}, null);
assertTrue("Open call should have completed", openLatch.await(5, TimeUnit.SECONDS));
assertFalse("Open should not have succeeded", returnCode.get() == BKException.Code.OK);
bsConfs.add(conf2);
bs.add(startBookie(conf2));
LedgerHandle lhafter = bkc.openLedger(lhbefore.getId(), digestType, "".getBytes());
assertEquals("Fenced ledger should have correct lastAddConfirmed", lhbefore.getLastAddConfirmed(), lhafter.getLastAddConfirmed());
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestDelayEnsembleChange method verifyEntriesRange.
private void verifyEntriesRange(LedgerHandle lh, long startEntry, long untilEntry, long expectedSuccess, long expectedMissing) throws Exception {
LedgerMetadata md = lh.getLedgerMetadata();
for (long eid = startEntry; eid < untilEntry; eid++) {
ArrayList<BookieSocketAddress> addresses = md.getEnsemble(eid);
VerificationCallback callback = new VerificationCallback(addresses.size());
for (BookieSocketAddress addr : addresses) {
bkc.getBookieClient().readEntry(addr, lh.getId(), eid, callback, addr, 0, null);
}
callback.latch.await();
assertTrue(expectedSuccess >= callback.numSuccess.get());
assertTrue(expectedMissing <= callback.numMissing.get());
assertEquals(0, callback.numFailure.get());
}
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestDelayEnsembleChange method testChangeEnsembleSecondBookieReadOnly.
@Test
public void testChangeEnsembleSecondBookieReadOnly() throws Exception {
LedgerHandle lh = bkc.createLedger(3, 3, 2, digestType, testPasswd);
byte[] data = "foobar".getBytes();
int numEntries = 10;
for (int i = 0; i < numEntries; i++) {
lh.addEntry(data);
}
BookieSocketAddress failedBookie = lh.getLedgerMetadata().currentEnsemble.get(0);
BookieSocketAddress readOnlyBookie = lh.getLedgerMetadata().currentEnsemble.get(1);
ServerConfiguration conf0 = killBookie(failedBookie);
for (int i = 0; i < numEntries; i++) {
lh.addEntry(data);
}
assertEquals("There should be ensemble change if delaying ensemble change is enabled.", 1, lh.getLedgerMetadata().getEnsembles().size());
// kill two bookies, but we still have 3 bookies for the ack quorum.
setBookieToReadOnly(readOnlyBookie);
for (int i = 0; i < numEntries; i++) {
lh.addEntry(data);
}
// ensure there is no ensemble changed
assertEquals("The ensemble should change when a bookie is readonly even if we delay ensemble change.", 2, lh.getLedgerMetadata().getEnsembles().size());
assertEquals(3, lh.getLedgerMetadata().currentEnsemble.size());
assertFalse(lh.getLedgerMetadata().currentEnsemble.contains(failedBookie));
assertFalse(lh.getLedgerMetadata().currentEnsemble.contains(readOnlyBookie));
}
use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.
the class TestDelayEnsembleChange method testChangeEnsembleIfBrokenAckQuorum.
@Test
public void testChangeEnsembleIfBrokenAckQuorum() throws Exception {
startNewBookie();
startNewBookie();
startNewBookie();
bkc.getTestStatsProvider().clear();
LedgerHandle lh = bkc.createLedger(5, 5, 3, digestType, testPasswd);
byte[] data = "foobar".getBytes();
int numEntries = 5;
for (int i = 0; i < numEntries; i++) {
lh.addEntry(data);
}
for (BookieSocketAddress addr : lh.getLedgerMetadata().getEnsembles().get(0L)) {
assertTrue(LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION + " should be > 0 for " + addr, bkc.getTestStatsProvider().getCounter(CLIENT_SCOPE + "." + LEDGER_ENSEMBLE_BOOKIE_DISTRIBUTION + "-" + addr).get() > 0);
}
assertTrue("Stats should have captured a new ensemble", bkc.getTestStatsProvider().getOpStatsLogger(CLIENT_SCOPE + "." + WATCHER_SCOPE + "." + NEW_ENSEMBLE_TIME).getSuccessCount() > 0);
assertTrue("Stats should not have captured an ensemble change", bkc.getTestStatsProvider().getOpStatsLogger(CLIENT_SCOPE + "." + WATCHER_SCOPE + "." + REPLACE_BOOKIE_TIME).getSuccessCount() == 0);
logger.info("Kill bookie 0 and write {} entries.", numEntries);
// kill two bookies, but we still have 3 bookies for the ack quorum.
ServerConfiguration conf0 = killBookie(lh.getLedgerMetadata().currentEnsemble.get(0));
for (int i = numEntries; i < 2 * numEntries; i++) {
lh.addEntry(data);
}
// ensure there is no ensemble changed
assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", 1, lh.getLedgerMetadata().getEnsembles().size());
assertTrue("Stats should not have captured an ensemble change", bkc.getTestStatsProvider().getOpStatsLogger(CLIENT_SCOPE + "." + WATCHER_SCOPE + "." + REPLACE_BOOKIE_TIME).getSuccessCount() == 0);
logger.info("Kill bookie 1 and write another {} entries.", numEntries);
ServerConfiguration conf1 = killBookie(lh.getLedgerMetadata().currentEnsemble.get(1));
for (int i = 2 * numEntries; i < 3 * numEntries; i++) {
lh.addEntry(data);
}
// ensure there is no ensemble changed
assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", 1, lh.getLedgerMetadata().getEnsembles().size());
logger.info("Kill bookie 2 and write another {} entries.", numEntries);
ServerConfiguration conf2 = killBookie(lh.getLedgerMetadata().currentEnsemble.get(2));
for (int i = 3 * numEntries; i < 4 * numEntries; i++) {
lh.addEntry(data);
}
// ensemble change should kick in
assertEquals("There should be ensemble change if ack quorum couldn't be formed.", 2, lh.getLedgerMetadata().getEnsembles().size());
assertTrue("Stats should have captured an ensemble change", bkc.getTestStatsProvider().getOpStatsLogger(CLIENT_SCOPE + "." + WATCHER_SCOPE + "." + REPLACE_BOOKIE_TIME).getSuccessCount() > 0);
ArrayList<BookieSocketAddress> firstFragment = lh.getLedgerMetadata().getEnsemble(0);
ArrayList<BookieSocketAddress> secondFragment = lh.getLedgerMetadata().getEnsemble(3 * numEntries);
assertFalse(firstFragment.get(0).equals(secondFragment.get(0)));
assertFalse(firstFragment.get(1).equals(secondFragment.get(1)));
assertFalse(firstFragment.get(2).equals(secondFragment.get(2)));
assertEquals(firstFragment.get(3), secondFragment.get(3));
assertEquals(firstFragment.get(4), secondFragment.get(4));
bsConfs.add(conf0);
bs.add(startBookie(conf0));
bsConfs.add(conf1);
bs.add(startBookie(conf1));
bsConfs.add(conf2);
bs.add(startBookie(conf2));
for (int i = 4 * numEntries; i < 5 * numEntries; i++) {
lh.addEntry(data);
}
// ensure there is no ensemble changed
assertEquals("There should be no ensemble change if delaying ensemble change is enabled.", 2, lh.getLedgerMetadata().getEnsembles().size());
// check entries
verifyEntries(lh, 0, numEntries, 5, 0);
verifyEntries(lh, numEntries, 2 * numEntries, 4, 1);
verifyEntries(lh, 2 * numEntries, 3 * numEntries, 3, 2);
verifyEntries(lh, 3 * numEntries, 4 * numEntries, 5, 0);
verifyEntries(lh, 4 * numEntries, 5 * numEntries, 5, 0);
}
Aggregations