use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class ReplicationWorker method deferLedgerLockRelease.
/**
* Schedules a timer task for releasing the lock which will be scheduled
* after open ledger fragment replication time. Ledger will be fenced if it
* is still in open state when timer task fired.
*/
private void deferLedgerLockRelease(final long ledgerId) {
long gracePeriod = this.openLedgerRereplicationGracePeriod;
TimerTask timerTask = new TimerTask() {
@Override
public void run() {
boolean isRecoveryOpen = false;
LedgerHandle lh = null;
try {
lh = admin.openLedgerNoRecovery(ledgerId);
if (isLastSegmentOpenAndMissingBookies(lh)) {
// Need recovery open, close the old ledger handle.
lh.close();
// Recovery open could result in client write failure.
LOG.warn("Missing bookie(s) from last segment. Opening Ledger{} for Recovery.", ledgerId);
lh = admin.openLedger(ledgerId);
isRecoveryOpen = true;
}
if (!isRecoveryOpen) {
Set<LedgerFragment> fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
for (LedgerFragment fragment : fragments) {
if (!fragment.isClosed()) {
// Need recovery open, close the old ledger handle.
lh.close();
// Recovery open could result in client write failure.
LOG.warn("Open Fragment{}. Opening Ledger{} for Recovery.", fragment.getEnsemble(), ledgerId);
lh = admin.openLedger(ledgerId);
isRecoveryOpen = true;
break;
}
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.info("InterruptedException while fencing the ledger {}" + " for rereplication of postponed ledgers", ledgerId, e);
} catch (BKNoSuchLedgerExistsException bknsle) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ledger {} was deleted, safe to continue", ledgerId, bknsle);
}
} catch (BKException e) {
LOG.error("BKException while fencing the ledger {}" + " for rereplication of postponed ledgers", ledgerId, e);
} finally {
try {
if (lh != null) {
lh.close();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOG.info("InterruptedException while closing ledger {}", ledgerId, e);
} catch (BKException e) {
// Lets go ahead and release the lock. Catch actual
// exception in normal replication flow and take
// action.
LOG.warn("BKException while closing ledger {} ", ledgerId, e);
} finally {
try {
underreplicationManager.releaseUnderreplicatedLedger(ledgerId);
} catch (UnavailableException e) {
LOG.error("UnavailableException while replicating fragments of ledger {}", ledgerId, e);
shutdown();
}
}
}
}
};
pendingReplicationTimer.schedule(timerTask, gracePeriod);
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class ReplicationWorker method rereplicate.
private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedException, BKException, UnavailableException {
if (LOG.isDebugEnabled()) {
LOG.debug("Going to replicate the fragments of the ledger: {}", ledgerIdToReplicate);
}
boolean deferLedgerLockRelease = false;
try (LedgerHandle lh = admin.openLedgerNoRecovery(ledgerIdToReplicate)) {
Set<LedgerFragment> fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
if (LOG.isDebugEnabled()) {
LOG.debug("Founds fragments {} for replication from ledger: {}", fragments, ledgerIdToReplicate);
}
boolean foundOpenFragments = false;
for (LedgerFragment ledgerFragment : fragments) {
if (!ledgerFragment.isClosed()) {
foundOpenFragments = true;
continue;
}
try {
admin.replicateLedgerFragment(lh, ledgerFragment);
} catch (BKException.BKBookieHandleNotAvailableException e) {
LOG.warn("BKBookieHandleNotAvailableException while replicating the fragment", e);
} catch (BKException.BKLedgerRecoveryException e) {
LOG.warn("BKLedgerRecoveryException while replicating the fragment", e);
} catch (BKException.BKNotEnoughBookiesException e) {
LOG.warn("BKNotEnoughBookiesException while replicating the fragment", e);
}
}
if (foundOpenFragments || isLastSegmentOpenAndMissingBookies(lh)) {
deferLedgerLockRelease = true;
deferLedgerLockRelease(ledgerIdToReplicate);
return false;
}
fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
if (fragments.size() == 0) {
LOG.info("Ledger replicated successfully. ledger id is: " + ledgerIdToReplicate);
underreplicationManager.markLedgerReplicated(ledgerIdToReplicate);
return true;
} else {
// for the replication again for the pending fragments
return false;
}
} catch (BKNoSuchLedgerExistsException e) {
// Ledger might have been deleted by user
LOG.info("BKNoSuchLedgerExistsException while opening " + "ledger {} for replication. Other clients " + "might have deleted the ledger. " + "So, no harm to continue", ledgerIdToReplicate);
underreplicationManager.markLedgerReplicated(ledgerIdToReplicate);
getExceptionCounter("BKNoSuchLedgerExistsException").inc();
return false;
} catch (BKNotEnoughBookiesException e) {
logBKExceptionAndReleaseLedger(e, ledgerIdToReplicate);
throw e;
} catch (BKException e) {
logBKExceptionAndReleaseLedger(e, ledgerIdToReplicate);
return false;
} finally {
// already been released, this is a no-op
if (!deferLedgerLockRelease) {
try {
underreplicationManager.releaseUnderreplicatedLedger(ledgerIdToReplicate);
} catch (UnavailableException e) {
LOG.error("UnavailableException while releasing the underreplicated lock for ledger {}:", ledgerIdToReplicate, e);
shutdown();
}
}
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class ZkLedgerUnderreplicationManager method notifyLostBookieRecoveryDelayChanged.
@Override
public void notifyLostBookieRecoveryDelayChanged(GenericCallback<Void> cb) throws UnavailableException {
LOG.debug("notifyLostBookieRecoveryDelayChanged()");
Watcher w = new Watcher() {
public void process(WatchedEvent e) {
if (e.getType() == Watcher.Event.EventType.NodeDataChanged) {
cb.operationComplete(0, null);
}
}
};
try {
if (null == zkc.exists(lostBookieRecoveryDelayZnode, w)) {
cb.operationComplete(0, null);
return;
}
} catch (KeeperException ke) {
LOG.error("Error while checking the state of lostBookieRecoveryDelay", ke);
throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie);
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class TestLedgerUnderreplicationManager method testEnableLedgerReplication.
/**
* Test enabling the ledger re-replication. After enableLedegerReplication,
* should continue getLedgerToRereplicate() task
*/
@Test
public void testEnableLedgerReplication() throws Exception {
isLedgerReplicationDisabled = true;
final LedgerUnderreplicationManager replicaMgr = lmf1.newLedgerUnderreplicationManager();
// simulate few urLedgers before disabling
final Long ledgerA = 0xfeadeefdacL;
final String missingReplica = "localhost:3181";
try {
replicaMgr.markLedgerUnderreplicated(ledgerA, missingReplica);
} catch (UnavailableException e) {
LOG.debug("Unexpected exception while marking urLedger", e);
fail("Unexpected exception while marking urLedger" + e.getMessage());
}
// disabling replication
replicaMgr.disableLedgerReplication();
LOG.debug("Disabled Ledeger Replication");
String znodeA = getUrLedgerZnode(ledgerA);
final CountDownLatch znodeLatch = new CountDownLatch(2);
String urledgerA = StringUtils.substringAfterLast(znodeA, "/");
String urLockLedgerA = basePath + "/locks/" + urledgerA;
zkc1.exists(urLockLedgerA, new Watcher() {
@Override
public void process(WatchedEvent event) {
if (event.getType() == EventType.NodeCreated) {
znodeLatch.countDown();
LOG.debug("Recieved node creation event for the zNodePath:" + event.getPath());
}
}
});
// getLedgerToRereplicate is waiting until enable rereplication
Thread thread1 = new Thread() {
@Override
public void run() {
try {
Long lA = replicaMgr.getLedgerToRereplicate();
assertEquals("Should be the ledger I just marked", lA, ledgerA);
isLedgerReplicationDisabled = false;
znodeLatch.countDown();
} catch (UnavailableException e) {
LOG.debug("Unexpected exception while marking urLedger", e);
isLedgerReplicationDisabled = false;
}
}
};
thread1.start();
try {
assertFalse("shouldn't complete", znodeLatch.await(1, TimeUnit.SECONDS));
assertTrue("Ledger replication is not disabled!", isLedgerReplicationDisabled);
assertEquals("Failed to disable ledger replication!", 2, znodeLatch.getCount());
replicaMgr.enableLedgerReplication();
znodeLatch.await(5, TimeUnit.SECONDS);
LOG.debug("Enabled Ledeger Replication");
assertTrue("Ledger replication is not disabled!", !isLedgerReplicationDisabled);
assertEquals("Failed to disable ledger replication!", 0, znodeLatch.getCount());
} finally {
thread1.interrupt();
}
}
use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.
the class Auditor method auditBookies.
@SuppressWarnings("unchecked")
private void auditBookies() throws BKAuditException, KeeperException, InterruptedException, BKException {
try {
waitIfLedgerReplicationDisabled();
} catch (UnavailableException ue) {
LOG.error("Underreplication unavailable, skipping audit." + "Will retry after a period");
return;
}
Stopwatch stopwatch = Stopwatch.createStarted();
// put exit cases here
Map<String, Set<Long>> ledgerDetails = generateBookie2LedgersIndex();
try {
if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) {
// has been disabled while we were generating the index
// discard this run, and schedule a new one
executor.submit(bookieCheck);
return;
}
} catch (UnavailableException ue) {
LOG.error("Underreplication unavailable, skipping audit." + "Will retry after a period");
return;
}
List<String> availableBookies = getAvailableBookies();
// find lost bookies
Set<String> knownBookies = ledgerDetails.keySet();
Collection<String> lostBookies = CollectionUtils.subtract(knownBookies, availableBookies);
bookieToLedgersMapCreationTime.registerSuccessfulEvent(stopwatch.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
if (lostBookies.size() > 0) {
handleLostBookies(lostBookies, ledgerDetails);
uRLPublishTimeForLostBookies.registerSuccessfulEvent(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
}
}
Aggregations