Search in sources :

Example 1 with UnavailableException

use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.

the class ReplicationWorker method deferLedgerLockRelease.

/**
 * Schedules a timer task for releasing the lock which will be scheduled
 * after open ledger fragment replication time. Ledger will be fenced if it
 * is still in open state when timer task fired.
 */
private void deferLedgerLockRelease(final long ledgerId) {
    long gracePeriod = this.openLedgerRereplicationGracePeriod;
    TimerTask timerTask = new TimerTask() {

        @Override
        public void run() {
            boolean isRecoveryOpen = false;
            LedgerHandle lh = null;
            try {
                lh = admin.openLedgerNoRecovery(ledgerId);
                if (isLastSegmentOpenAndMissingBookies(lh)) {
                    // Need recovery open, close the old ledger handle.
                    lh.close();
                    // Recovery open could result in client write failure.
                    LOG.warn("Missing bookie(s) from last segment. Opening Ledger{} for Recovery.", ledgerId);
                    lh = admin.openLedger(ledgerId);
                    isRecoveryOpen = true;
                }
                if (!isRecoveryOpen) {
                    Set<LedgerFragment> fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
                    for (LedgerFragment fragment : fragments) {
                        if (!fragment.isClosed()) {
                            // Need recovery open, close the old ledger handle.
                            lh.close();
                            // Recovery open could result in client write failure.
                            LOG.warn("Open Fragment{}. Opening Ledger{} for Recovery.", fragment.getEnsemble(), ledgerId);
                            lh = admin.openLedger(ledgerId);
                            isRecoveryOpen = true;
                            break;
                        }
                    }
                }
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                LOG.info("InterruptedException while fencing the ledger {}" + " for rereplication of postponed ledgers", ledgerId, e);
            } catch (BKNoSuchLedgerExistsException bknsle) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Ledger {} was deleted, safe to continue", ledgerId, bknsle);
                }
            } catch (BKException e) {
                LOG.error("BKException while fencing the ledger {}" + " for rereplication of postponed ledgers", ledgerId, e);
            } finally {
                try {
                    if (lh != null) {
                        lh.close();
                    }
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    LOG.info("InterruptedException while closing ledger {}", ledgerId, e);
                } catch (BKException e) {
                    // Lets go ahead and release the lock. Catch actual
                    // exception in normal replication flow and take
                    // action.
                    LOG.warn("BKException while closing ledger {} ", ledgerId, e);
                } finally {
                    try {
                        underreplicationManager.releaseUnderreplicatedLedger(ledgerId);
                    } catch (UnavailableException e) {
                        LOG.error("UnavailableException while replicating fragments of ledger {}", ledgerId, e);
                        shutdown();
                    }
                }
            }
        }
    };
    pendingReplicationTimer.schedule(timerTask, gracePeriod);
}
Also used : TimerTask(java.util.TimerTask) LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) BKException(org.apache.bookkeeper.client.BKException) LedgerFragment(org.apache.bookkeeper.client.LedgerFragment) BKNoSuchLedgerExistsException(org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException)

Example 2 with UnavailableException

use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.

the class ReplicationWorker method rereplicate.

private boolean rereplicate(long ledgerIdToReplicate) throws InterruptedException, BKException, UnavailableException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Going to replicate the fragments of the ledger: {}", ledgerIdToReplicate);
    }
    boolean deferLedgerLockRelease = false;
    try (LedgerHandle lh = admin.openLedgerNoRecovery(ledgerIdToReplicate)) {
        Set<LedgerFragment> fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Founds fragments {} for replication from ledger: {}", fragments, ledgerIdToReplicate);
        }
        boolean foundOpenFragments = false;
        for (LedgerFragment ledgerFragment : fragments) {
            if (!ledgerFragment.isClosed()) {
                foundOpenFragments = true;
                continue;
            }
            try {
                admin.replicateLedgerFragment(lh, ledgerFragment);
            } catch (BKException.BKBookieHandleNotAvailableException e) {
                LOG.warn("BKBookieHandleNotAvailableException while replicating the fragment", e);
            } catch (BKException.BKLedgerRecoveryException e) {
                LOG.warn("BKLedgerRecoveryException while replicating the fragment", e);
            } catch (BKException.BKNotEnoughBookiesException e) {
                LOG.warn("BKNotEnoughBookiesException while replicating the fragment", e);
            }
        }
        if (foundOpenFragments || isLastSegmentOpenAndMissingBookies(lh)) {
            deferLedgerLockRelease = true;
            deferLedgerLockRelease(ledgerIdToReplicate);
            return false;
        }
        fragments = getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
        if (fragments.size() == 0) {
            LOG.info("Ledger replicated successfully. ledger id is: " + ledgerIdToReplicate);
            underreplicationManager.markLedgerReplicated(ledgerIdToReplicate);
            return true;
        } else {
            // for the replication again for the pending fragments
            return false;
        }
    } catch (BKNoSuchLedgerExistsException e) {
        // Ledger might have been deleted by user
        LOG.info("BKNoSuchLedgerExistsException while opening " + "ledger {} for replication. Other clients " + "might have deleted the ledger. " + "So, no harm to continue", ledgerIdToReplicate);
        underreplicationManager.markLedgerReplicated(ledgerIdToReplicate);
        getExceptionCounter("BKNoSuchLedgerExistsException").inc();
        return false;
    } catch (BKNotEnoughBookiesException e) {
        logBKExceptionAndReleaseLedger(e, ledgerIdToReplicate);
        throw e;
    } catch (BKException e) {
        logBKExceptionAndReleaseLedger(e, ledgerIdToReplicate);
        return false;
    } finally {
        // already been released, this is a no-op
        if (!deferLedgerLockRelease) {
            try {
                underreplicationManager.releaseUnderreplicatedLedger(ledgerIdToReplicate);
            } catch (UnavailableException e) {
                LOG.error("UnavailableException while releasing the underreplicated lock for ledger {}:", ledgerIdToReplicate, e);
                shutdown();
            }
        }
    }
}
Also used : LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) BKException(org.apache.bookkeeper.client.BKException) LedgerFragment(org.apache.bookkeeper.client.LedgerFragment) BKNotEnoughBookiesException(org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException) BKNotEnoughBookiesException(org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException) BKNoSuchLedgerExistsException(org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException)

Example 3 with UnavailableException

use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.

the class ZkLedgerUnderreplicationManager method notifyLostBookieRecoveryDelayChanged.

@Override
public void notifyLostBookieRecoveryDelayChanged(GenericCallback<Void> cb) throws UnavailableException {
    LOG.debug("notifyLostBookieRecoveryDelayChanged()");
    Watcher w = new Watcher() {

        public void process(WatchedEvent e) {
            if (e.getType() == Watcher.Event.EventType.NodeDataChanged) {
                cb.operationComplete(0, null);
            }
        }
    };
    try {
        if (null == zkc.exists(lostBookieRecoveryDelayZnode, w)) {
            cb.operationComplete(0, null);
            return;
        }
    } catch (KeeperException ke) {
        LOG.error("Error while checking the state of lostBookieRecoveryDelay", ke);
        throw new ReplicationException.UnavailableException("Error contacting zookeeper", ke);
    } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
        throw new ReplicationException.UnavailableException("Interrupted while contacting zookeeper", ie);
    }
}
Also used : WatchedEvent(org.apache.zookeeper.WatchedEvent) Watcher(org.apache.zookeeper.Watcher) ReplicationException(org.apache.bookkeeper.replication.ReplicationException) KeeperException(org.apache.zookeeper.KeeperException) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException)

Example 4 with UnavailableException

use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.

the class TestLedgerUnderreplicationManager method testEnableLedgerReplication.

/**
 * Test enabling the ledger re-replication. After enableLedegerReplication,
 * should continue getLedgerToRereplicate() task
 */
@Test
public void testEnableLedgerReplication() throws Exception {
    isLedgerReplicationDisabled = true;
    final LedgerUnderreplicationManager replicaMgr = lmf1.newLedgerUnderreplicationManager();
    // simulate few urLedgers before disabling
    final Long ledgerA = 0xfeadeefdacL;
    final String missingReplica = "localhost:3181";
    try {
        replicaMgr.markLedgerUnderreplicated(ledgerA, missingReplica);
    } catch (UnavailableException e) {
        LOG.debug("Unexpected exception while marking urLedger", e);
        fail("Unexpected exception while marking urLedger" + e.getMessage());
    }
    // disabling replication
    replicaMgr.disableLedgerReplication();
    LOG.debug("Disabled Ledeger Replication");
    String znodeA = getUrLedgerZnode(ledgerA);
    final CountDownLatch znodeLatch = new CountDownLatch(2);
    String urledgerA = StringUtils.substringAfterLast(znodeA, "/");
    String urLockLedgerA = basePath + "/locks/" + urledgerA;
    zkc1.exists(urLockLedgerA, new Watcher() {

        @Override
        public void process(WatchedEvent event) {
            if (event.getType() == EventType.NodeCreated) {
                znodeLatch.countDown();
                LOG.debug("Recieved node creation event for the zNodePath:" + event.getPath());
            }
        }
    });
    // getLedgerToRereplicate is waiting until enable rereplication
    Thread thread1 = new Thread() {

        @Override
        public void run() {
            try {
                Long lA = replicaMgr.getLedgerToRereplicate();
                assertEquals("Should be the ledger I just marked", lA, ledgerA);
                isLedgerReplicationDisabled = false;
                znodeLatch.countDown();
            } catch (UnavailableException e) {
                LOG.debug("Unexpected exception while marking urLedger", e);
                isLedgerReplicationDisabled = false;
            }
        }
    };
    thread1.start();
    try {
        assertFalse("shouldn't complete", znodeLatch.await(1, TimeUnit.SECONDS));
        assertTrue("Ledger replication is not disabled!", isLedgerReplicationDisabled);
        assertEquals("Failed to disable ledger replication!", 2, znodeLatch.getCount());
        replicaMgr.enableLedgerReplication();
        znodeLatch.await(5, TimeUnit.SECONDS);
        LOG.debug("Enabled Ledeger Replication");
        assertTrue("Ledger replication is not disabled!", !isLedgerReplicationDisabled);
        assertEquals("Failed to disable ledger replication!", 0, znodeLatch.getCount());
    } finally {
        thread1.interrupt();
    }
}
Also used : WatchedEvent(org.apache.zookeeper.WatchedEvent) ZkLedgerUnderreplicationManager(org.apache.bookkeeper.meta.ZkLedgerUnderreplicationManager) LedgerUnderreplicationManager(org.apache.bookkeeper.meta.LedgerUnderreplicationManager) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) Watcher(org.apache.zookeeper.Watcher) CountDownLatch(java.util.concurrent.CountDownLatch) Test(org.junit.Test)

Example 5 with UnavailableException

use of org.apache.bookkeeper.replication.ReplicationException.UnavailableException in project bookkeeper by apache.

the class Auditor method auditBookies.

@SuppressWarnings("unchecked")
private void auditBookies() throws BKAuditException, KeeperException, InterruptedException, BKException {
    try {
        waitIfLedgerReplicationDisabled();
    } catch (UnavailableException ue) {
        LOG.error("Underreplication unavailable, skipping audit." + "Will retry after a period");
        return;
    }
    Stopwatch stopwatch = Stopwatch.createStarted();
    // put exit cases here
    Map<String, Set<Long>> ledgerDetails = generateBookie2LedgersIndex();
    try {
        if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) {
            // has been disabled while we were generating the index
            // discard this run, and schedule a new one
            executor.submit(bookieCheck);
            return;
        }
    } catch (UnavailableException ue) {
        LOG.error("Underreplication unavailable, skipping audit." + "Will retry after a period");
        return;
    }
    List<String> availableBookies = getAvailableBookies();
    // find lost bookies
    Set<String> knownBookies = ledgerDetails.keySet();
    Collection<String> lostBookies = CollectionUtils.subtract(knownBookies, availableBookies);
    bookieToLedgersMapCreationTime.registerSuccessfulEvent(stopwatch.elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
    if (lostBookies.size() > 0) {
        handleLostBookies(lostBookies, ledgerDetails);
        uRLPublishTimeForLostBookies.registerSuccessfulEvent(stopwatch.stop().elapsed(TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS);
    }
}
Also used : Set(java.util.Set) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) Stopwatch(com.google.common.base.Stopwatch)

Aggregations

UnavailableException (org.apache.bookkeeper.replication.ReplicationException.UnavailableException)16 BKException (org.apache.bookkeeper.client.BKException)4 LedgerUnderreplicationManager (org.apache.bookkeeper.meta.LedgerUnderreplicationManager)4 BKAuditException (org.apache.bookkeeper.replication.ReplicationException.BKAuditException)4 CompatibilityException (org.apache.bookkeeper.replication.ReplicationException.CompatibilityException)4 KeeperException (org.apache.zookeeper.KeeperException)4 BookieSocketAddress (org.apache.bookkeeper.net.BookieSocketAddress)3 Watcher (org.apache.zookeeper.Watcher)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 List (java.util.List)2 Set (java.util.Set)2 BKNoSuchLedgerExistsException (org.apache.bookkeeper.client.BKException.BKNoSuchLedgerExistsException)2 BookKeeperTestClient (org.apache.bookkeeper.client.BookKeeperTestClient)2 LedgerFragment (org.apache.bookkeeper.client.LedgerFragment)2 LedgerHandle (org.apache.bookkeeper.client.LedgerHandle)2 ClientConfiguration (org.apache.bookkeeper.conf.ClientConfiguration)2 ServerConfiguration (org.apache.bookkeeper.conf.ServerConfiguration)2