use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class BookKeeperClusterTestCase method startBookie.
/**
* Start a bookie with the given bookie instance. Also, starts the auto
* recovery for this bookie, if isAutoRecoveryEnabled is true.
*/
protected BookieServer startBookie(ServerConfiguration conf, final Bookie b) throws Exception {
TestStatsProvider provider = new TestStatsProvider();
BookieServer server = new BookieServer(conf, provider.getStatsLogger("")) {
@Override
protected Bookie newBookie(ServerConfiguration conf) {
return b;
}
};
BookieSocketAddress address = Bookie.getBookieAddress(conf);
if (bkc == null) {
bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider());
}
Future<?> waitForBookie = conf.isForceReadOnlyBookie() ? bkc.waitForReadOnlyBookie(address) : bkc.waitForWritableBookie(address);
server.start();
bsLoggers.put(server.getLocalAddress(), provider);
waitForBookie.get(30, TimeUnit.SECONDS);
LOG.info("New bookie '{}' has been created.", address);
try {
startAutoRecovery(server, conf);
} catch (CompatibilityException ce) {
LOG.error("Exception while starting AutoRecovery!", ce);
} catch (UnavailableException ue) {
LOG.error("Exception while starting AutoRecovery!", ue);
}
return server;
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class AuditorElector method submitElectionTask.
/**
* Performing the auditor election using the ZooKeeper ephemeral sequential
* znode. The bookie which has created the least sequential will be elect as
* Auditor.
*/
@VisibleForTesting
void submitElectionTask() {
Runnable r = new Runnable() {
public void run() {
if (!running.get()) {
return;
}
try {
// creating my vote in zk. Vote format is 'V_numeric'
createMyVote();
List<String> children = zkc.getChildren(getVotePath(""), false);
if (0 >= children.size()) {
throw new IllegalArgumentException("Atleast one bookie server should present to elect the Auditor!");
}
// sorting in ascending order of sequential number
Collections.sort(children, new ElectionComparator());
String voteNode = StringUtils.substringAfterLast(myVote, PATH_SEPARATOR);
// starting Auditing service
if (children.get(AUDITOR_INDEX).equals(voteNode)) {
// update the auditor bookie id in the election path. This is
// done for debugging purpose
AuditorVoteFormat.Builder builder = AuditorVoteFormat.newBuilder().setBookieId(bookieId);
zkc.setData(getVotePath(""), TextFormat.printToString(builder.build()).getBytes(UTF_8), -1);
auditor = new Auditor(bookieId, conf, zkc, statsLogger);
auditor.start();
} else {
// If not an auditor, will be watching to my predecessor and
// looking the previous node deletion.
Watcher electionWatcher = new ElectionWatcher();
int myIndex = children.indexOf(voteNode);
int prevNodeIndex = myIndex - 1;
if (null == zkc.exists(getVotePath(PATH_SEPARATOR) + children.get(prevNodeIndex), electionWatcher)) {
// While adding, the previous znode doesn't exists.
// Again going to election.
submitElectionTask();
}
electionAttempts.inc();
}
} catch (KeeperException e) {
LOG.error("Exception while performing auditor election", e);
submitShutdownTask();
} catch (InterruptedException e) {
LOG.error("Interrupted while performing auditor election", e);
Thread.currentThread().interrupt();
submitShutdownTask();
} catch (UnavailableException e) {
LOG.error("Ledger underreplication manager unavailable during election", e);
submitShutdownTask();
}
}
};
executor.submit(r);
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class Auditor method submitAuditTask.
@VisibleForTesting
synchronized Future<?> submitAuditTask() {
if (executor.isShutdown()) {
SettableFuture<Void> f = SettableFuture.<Void>create();
f.setException(new BKAuditException("Auditor shutting down"));
return f;
}
return executor.submit(new Runnable() {
@SuppressWarnings("unchecked")
public void run() {
try {
waitIfLedgerReplicationDisabled();
int lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager.getLostBookieRecoveryDelay();
List<String> availableBookies = getAvailableBookies();
// casting to String, as knownBookies and availableBookies
// contains only String values
// find new bookies(if any) and update the known bookie list
Collection<String> newBookies = CollectionUtils.subtract(availableBookies, knownBookies);
knownBookies.addAll(newBookies);
if (!bookiesToBeAudited.isEmpty() && knownBookies.containsAll(bookiesToBeAudited)) {
// check should be ok
if (auditTask != null && auditTask.cancel(false)) {
auditTask = null;
numDelayedBookieAuditsCancelled.inc();
}
bookiesToBeAudited.clear();
}
// find lost bookies(if any)
bookiesToBeAudited.addAll(CollectionUtils.subtract(knownBookies, availableBookies));
if (bookiesToBeAudited.size() == 0) {
return;
}
knownBookies.removeAll(bookiesToBeAudited);
if (lostBookieRecoveryDelay == 0) {
startAudit(false);
bookiesToBeAudited.clear();
return;
}
if (bookiesToBeAudited.size() > 1) {
// if more than one bookie is down, start the audit immediately;
LOG.info("Multiple bookie failure; not delaying bookie audit. " + "Bookies lost now: {}; All lost bookies: {}", CollectionUtils.subtract(knownBookies, availableBookies), bookiesToBeAudited);
if (auditTask != null && auditTask.cancel(false)) {
auditTask = null;
numDelayedBookieAuditsCancelled.inc();
}
startAudit(false);
bookiesToBeAudited.clear();
return;
}
if (auditTask == null) {
// if there is no scheduled audit, schedule one
auditTask = executor.schedule(new Runnable() {
public void run() {
startAudit(false);
auditTask = null;
bookiesToBeAudited.clear();
}
}, lostBookieRecoveryDelay, TimeUnit.SECONDS);
numBookieAuditsDelayed.inc();
LOG.info("Delaying bookie audit by {} secs for {}", lostBookieRecoveryDelay, bookiesToBeAudited);
}
} catch (BKException bke) {
LOG.error("Exception getting bookie list", bke);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
LOG.error("Interrupted while watching available bookies ", ie);
} catch (UnavailableException ue) {
LOG.error("Exception while watching available bookies", ue);
}
}
});
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class BookKeeperAdminTest method testTriggerAudit.
@Test
public void testTriggerAudit() throws Exception {
ZkLedgerUnderreplicationManager urLedgerMgr = new ZkLedgerUnderreplicationManager(baseClientConf, zkc);
BookKeeperAdmin bkAdmin = new BookKeeperAdmin(zkUtil.getZooKeeperConnectString());
int lostBookieRecoveryDelayValue = bkAdmin.getLostBookieRecoveryDelay();
urLedgerMgr.disableLedgerReplication();
try {
bkAdmin.triggerAudit();
fail("Trigger Audit should have failed because LedgerReplication is disabled");
} catch (UnavailableException une) {
// expected
}
assertEquals("LostBookieRecoveryDelay", lostBookieRecoveryDelayValue, bkAdmin.getLostBookieRecoveryDelay());
urLedgerMgr.enableLedgerReplication();
bkAdmin.triggerAudit();
assertEquals("LostBookieRecoveryDelay", lostBookieRecoveryDelayValue, bkAdmin.getLostBookieRecoveryDelay());
long ledgerId = 1L;
LedgerHandle ledgerHandle = bkc.createLedgerAdv(ledgerId, numBookies, numBookies, numBookies, digestType, PASSWORD.getBytes(), null);
ledgerHandle.addEntry(0, "data".getBytes());
ledgerHandle.close();
killBookie(1);
/*
* since lostBookieRecoveryDelay is set, when a bookie is died, it will
* not start Audit process immediately. But when triggerAudit is called
* it will force audit process.
*/
bkAdmin.triggerAudit();
Thread.sleep(500);
Iterator<Long> ledgersToRereplicate = urLedgerMgr.listLedgersToRereplicate(null);
assertTrue("There are supposed to be underreplicatedledgers", ledgersToRereplicate.hasNext());
assertEquals("Underreplicated ledgerId", ledgerId, ledgersToRereplicate.next().longValue());
bkAdmin.close();
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class BookKeeperAdmin method triggerAudit.
/**
* Trigger AuditTask by resetting lostBookieRecoveryDelay to its current
* value. If Autorecovery is not enabled or if there is no Auditor then this
* method will throw UnavailableException.
*
* @throws CompatibilityException
* @throws KeeperException
* @throws InterruptedException
* @throws UnavailableException
* @throws IOException
*/
public void triggerAudit() throws CompatibilityException, KeeperException, InterruptedException, UnavailableException, IOException {
LedgerUnderreplicationManager urlManager = getUnderreplicationManager();
if (!urlManager.isLedgerReplicationEnabled()) {
LOG.error("Autorecovery is disabled. So giving up!");
throw new UnavailableException("Autorecovery is disabled. So giving up!");
}
BookieSocketAddress auditorId = AuditorElector.getCurrentAuditor(new ServerConfiguration(bkc.conf), bkc.getZkHandle());
if (auditorId == null) {
LOG.error("No auditor elected, though Autorecovery is enabled. So giving up.");
throw new UnavailableException("No auditor elected, though Autorecovery is enabled. So giving up.");
}
int previousLostBookieRecoveryDelayValue = urlManager.getLostBookieRecoveryDelay();
LOG.info("Resetting LostBookieRecoveryDelay value: {}, to kickstart audit task", previousLostBookieRecoveryDelayValue);
urlManager.setLostBookieRecoveryDelay(previousLostBookieRecoveryDelayValue);
}
Aggregations