use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class Auditor method publishSuspectedLedgers.
private void publishSuspectedLedgers(String bookieIP, Set<Long> ledgers) throws BKAuditException {
if (null == ledgers || ledgers.size() == 0) {
// there is no ledgers available for this bookie and just
// ignoring the bookie failures
LOG.info("There is no ledgers for the failed bookie: {}", bookieIP);
return;
}
LOG.info("Following ledgers: {} of bookie: {} are identified as underreplicated", ledgers, bookieIP);
numUnderReplicatedLedger.registerSuccessfulValue(ledgers.size());
for (Long ledgerId : ledgers) {
try {
ledgerUnderreplicationManager.markLedgerUnderreplicated(ledgerId, bookieIP);
} catch (UnavailableException ue) {
throw new BKAuditException("Failed to publish underreplicated ledger: " + ledgerId + " of bookie: " + bookieIP, ue);
}
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class Auditor method submitLostBookieRecoveryDelayChangedEvent.
synchronized Future<?> submitLostBookieRecoveryDelayChangedEvent() {
if (executor.isShutdown()) {
SettableFuture<Void> f = SettableFuture.<Void>create();
f.setException(new BKAuditException("Auditor shutting down"));
return f;
}
return executor.submit(new Runnable() {
int lostBookieRecoveryDelay = -1;
public void run() {
try {
waitIfLedgerReplicationDisabled();
lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager.getLostBookieRecoveryDelay();
// after new lostBookieRecoveryDelay period
if (auditTask != null) {
LOG.info("lostBookieRecoveryDelay period has been changed so canceling the pending AuditTask");
auditTask.cancel(false);
numDelayedBookieAuditsCancelled.inc();
}
// signal to trigger the Audit immediately.
if ((lostBookieRecoveryDelay == 0) || (lostBookieRecoveryDelay == lostBookieRecoveryDelayBeforeChange)) {
LOG.info("lostBookieRecoveryDelay has been set to 0 or reset to its previous value, " + "so starting AuditTask. Current lostBookieRecoveryDelay: {}, " + "previous lostBookieRecoveryDelay: {}", lostBookieRecoveryDelay, lostBookieRecoveryDelayBeforeChange);
startAudit(false);
auditTask = null;
bookiesToBeAudited.clear();
} else if (auditTask != null) {
LOG.info("lostBookieRecoveryDelay has been set to {}, so rescheduling AuditTask accordingly", lostBookieRecoveryDelay);
auditTask = executor.schedule(new Runnable() {
public void run() {
startAudit(false);
auditTask = null;
bookiesToBeAudited.clear();
}
}, lostBookieRecoveryDelay, TimeUnit.SECONDS);
numBookieAuditsDelayed.inc();
}
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
LOG.error("Interrupted while for LedgersReplication to be enabled ", ie);
} catch (UnavailableException ue) {
LOG.error("Exception while reading from ZK", ue);
} finally {
if (lostBookieRecoveryDelay != -1) {
lostBookieRecoveryDelayBeforeChange = lostBookieRecoveryDelay;
}
}
}
});
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class Auditor method initialize.
private void initialize(ServerConfiguration conf, ZooKeeper zkc) throws UnavailableException {
try {
ClientConfiguration clientConfiguration = new ClientConfiguration(conf);
clientConfiguration.setClientRole(ClientConfiguration.CLIENT_ROLE_SYSTEM);
LOG.info("AuthProvider used by the Auditor is {}", clientConfiguration.getClientAuthProviderFactoryClass());
this.bkc = new BookKeeper(clientConfiguration, zkc);
LedgerManagerFactory ledgerManagerFactory = AbstractZkLedgerManagerFactory.newLedgerManagerFactory(conf, bkc.getMetadataClientDriver().getLayoutManager());
ledgerManager = ledgerManagerFactory.newLedgerManager();
this.bookieLedgerIndexer = new BookieLedgerIndexer(ledgerManager);
this.ledgerUnderreplicationManager = ledgerManagerFactory.newLedgerUnderreplicationManager();
this.admin = new BookKeeperAdmin(bkc, statsLogger);
if (this.ledgerUnderreplicationManager.initializeLostBookieRecoveryDelay(conf.getLostBookieRecoveryDelay())) {
LOG.info("Initializing lostBookieRecoveryDelay zNode to the conif value: {}", conf.getLostBookieRecoveryDelay());
} else {
LOG.info("Valid lostBookieRecoveryDelay zNode is available, so not creating " + "lostBookieRecoveryDelay zNode as part of Auditor initialization ");
}
lostBookieRecoveryDelayBeforeChange = this.ledgerUnderreplicationManager.getLostBookieRecoveryDelay();
} catch (CompatibilityException ce) {
throw new UnavailableException("CompatibilityException while initializing Auditor", ce);
} catch (IOException | BKException | KeeperException ioe) {
throw new UnavailableException("Exception while initializing Auditor", ioe);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new UnavailableException("Interrupted while initializing Auditor", ie);
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class BookKeeperAdmin method decommissionBookie.
/**
* Triggers AuditTask by resetting lostBookieRecoveryDelay and then make
* sure the ledgers stored in the given decommissioning bookie are properly
* replicated and they are not underreplicated because of the given bookie.
* This method waits untill there are no underreplicatedledgers because of this
* bookie. If the given Bookie is not shutdown yet, then it will throw
* BKIllegalOpException.
*
* @param bookieAddress
* address of the decommissioning bookie
* @throws CompatibilityException
* @throws UnavailableException
* @throws KeeperException
* @throws InterruptedException
* @throws IOException
* @throws BKAuditException
* @throws TimeoutException
* @throws BKException
*/
public void decommissionBookie(BookieSocketAddress bookieAddress) throws CompatibilityException, UnavailableException, KeeperException, InterruptedException, IOException, BKAuditException, TimeoutException, BKException {
if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookies().contains(bookieAddress)) {
LOG.error("Bookie: {} is not shutdown yet", bookieAddress);
throw BKException.create(BKException.Code.IllegalOpException);
}
triggerAudit();
/*
* Sleep for 30 secs, so that Auditor gets chance to trigger its
* force audittask and let the underreplicationmanager process
* to do its replication process
*/
Thread.sleep(30 * 1000);
/*
* get the collection of the ledgers which are stored in this
* bookie, by making a call to
* bookieLedgerIndexer.getBookieToLedgerIndex.
*/
BookieLedgerIndexer bookieLedgerIndexer = new BookieLedgerIndexer(bkc.ledgerManager);
Map<String, Set<Long>> bookieToLedgersMap = bookieLedgerIndexer.getBookieToLedgerIndex();
Set<Long> ledgersStoredInThisBookie = bookieToLedgersMap.get(bookieAddress.toString());
if ((ledgersStoredInThisBookie != null) && (!ledgersStoredInThisBookie.isEmpty())) {
/*
* wait untill all the ledgers are replicated to other
* bookies by making sure that these ledgers metadata don't
* contain this bookie as part of their ensemble.
*/
waitForLedgersToBeReplicated(ledgersStoredInThisBookie, bookieAddress, bkc.ledgerManager);
}
// for double-checking, check if any ledgers are listed as underreplicated because of this bookie
Predicate<List<String>> predicate = replicasList -> replicasList.contains(bookieAddress.toString());
Iterator<Long> urLedgerIterator = underreplicationManager.listLedgersToRereplicate(predicate);
if (urLedgerIterator.hasNext()) {
// if there are any then wait and make sure those ledgers are replicated properly
LOG.info("Still in some underreplicated ledgers metadata, this bookie is part of its ensemble. " + "Have to make sure that those ledger fragments are rereplicated");
List<Long> urLedgers = new ArrayList<>();
urLedgerIterator.forEachRemaining(urLedgers::add);
waitForLedgersToBeReplicated(urLedgers, bookieAddress, bkc.ledgerManager);
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class BookKeeperClusterTestCase method startBookie.
/**
* Helper method to startup a bookie server using a configuration object.
* Also, starts the auto recovery process if isAutoRecoveryEnabled is true.
*
* @param conf
* Server Configuration Object
*/
protected BookieServer startBookie(ServerConfiguration conf) throws Exception {
TestStatsProvider provider = new TestStatsProvider();
BookieServer server = new BookieServer(conf, provider.getStatsLogger(""));
BookieSocketAddress address = Bookie.getBookieAddress(conf);
bsLoggers.put(address, provider);
if (bkc == null) {
bkc = new BookKeeperTestClient(baseClientConf, new TestStatsProvider());
}
Future<?> waitForBookie = conf.isForceReadOnlyBookie() ? bkc.waitForReadOnlyBookie(address) : bkc.waitForWritableBookie(address);
server.start();
waitForBookie.get(30, TimeUnit.SECONDS);
LOG.info("New bookie '{}' has been created.", address);
try {
startAutoRecovery(server, conf);
} catch (CompatibilityException ce) {
LOG.error("Exception while starting AutoRecovery!", ce);
} catch (UnavailableException ue) {
LOG.error("Exception while starting AutoRecovery!", ue);
}
return server;
}
Aggregations