Search in sources :

Example 1 with BKAuditException

use of org.apache.bookkeeper.replication.ReplicationException.BKAuditException in project bookkeeper by apache.

the class BookieLedgerIndexer method getBookieToLedgerIndex.

/**
 * Generating bookie vs its ledgers map by reading all the ledgers in each
 * bookie and parsing its metadata.
 *
 * @return bookie2ledgersMap map of bookie vs ledgers
 * @throws BKAuditException
 *             exception while getting bookie-ledgers
 */
public Map<String, Set<Long>> getBookieToLedgerIndex() throws BKAuditException {
    // bookie vs ledgers map
    final ConcurrentHashMap<String, Set<Long>> bookie2ledgersMap = new ConcurrentHashMap<String, Set<Long>>();
    final CountDownLatch ledgerCollectorLatch = new CountDownLatch(1);
    Processor<Long> ledgerProcessor = new Processor<Long>() {

        @Override
        public void process(final Long ledgerId, final AsyncCallback.VoidCallback iterCallback) {
            GenericCallback<LedgerMetadata> genericCallback = new GenericCallback<LedgerMetadata>() {

                @Override
                public void operationComplete(int rc, LedgerMetadata ledgerMetadata) {
                    if (rc == BKException.Code.OK) {
                        for (Map.Entry<Long, ArrayList<BookieSocketAddress>> ensemble : ledgerMetadata.getEnsembles().entrySet()) {
                            for (BookieSocketAddress bookie : ensemble.getValue()) {
                                putLedger(bookie2ledgersMap, bookie.toString(), ledgerId);
                            }
                        }
                    } else if (rc == BKException.Code.NoSuchLedgerExistsException) {
                        LOG.info("Ignoring replication of already deleted ledger {}", ledgerId);
                        rc = BKException.Code.OK;
                    } else {
                        LOG.warn("Unable to read the ledger:" + ledgerId + " information");
                    }
                    iterCallback.processResult(rc, null, null);
                }
            };
            ledgerManager.readLedgerMetadata(ledgerId, genericCallback);
        }
    };
    // Reading the result after processing all the ledgers
    final List<Integer> resultCode = new ArrayList<Integer>(1);
    ledgerManager.asyncProcessLedgers(ledgerProcessor, new AsyncCallback.VoidCallback() {

        @Override
        public void processResult(int rc, String s, Object obj) {
            resultCode.add(rc);
            ledgerCollectorLatch.countDown();
        }
    }, null, BKException.Code.OK, BKException.Code.ReadException);
    try {
        ledgerCollectorLatch.await();
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new BKAuditException("Exception while getting the bookie-ledgers", e);
    }
    if (!resultCode.contains(BKException.Code.OK)) {
        throw new BKAuditException("Exception while getting the bookie-ledgers", BKException.create(resultCode.get(0)));
    }
    return bookie2ledgersMap;
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Processor(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor) AsyncCallback(org.apache.zookeeper.AsyncCallback) ArrayList(java.util.ArrayList) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) GenericCallback(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback) CountDownLatch(java.util.concurrent.CountDownLatch) LedgerMetadata(org.apache.bookkeeper.client.LedgerMetadata) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map)

Example 2 with BKAuditException

use of org.apache.bookkeeper.replication.ReplicationException.BKAuditException in project bookkeeper by apache.

the class Auditor method checkAllLedgers.

/**
 * List all the ledgers and check them individually. This should not
 * be run very often.
 */
void checkAllLedgers() throws BKAuditException, BKException, IOException, InterruptedException, KeeperException {
    ZooKeeper newzk = ZooKeeperClient.newBuilder().connectString(conf.getZkServers()).sessionTimeoutMs(conf.getZkTimeout()).build();
    final BookKeeper client = new BookKeeper(new ClientConfiguration(conf), newzk);
    final BookKeeperAdmin admin = new BookKeeperAdmin(client, statsLogger);
    try {
        final LedgerChecker checker = new LedgerChecker(client);
        final AtomicInteger returnCode = new AtomicInteger(BKException.Code.OK);
        final CountDownLatch processDone = new CountDownLatch(1);
        Processor<Long> checkLedgersProcessor = new Processor<Long>() {

            @Override
            public void process(final Long ledgerId, final AsyncCallback.VoidCallback callback) {
                try {
                    if (!ledgerUnderreplicationManager.isLedgerReplicationEnabled()) {
                        LOG.info("Ledger rereplication has been disabled, aborting periodic check");
                        processDone.countDown();
                        return;
                    }
                } catch (ReplicationException.UnavailableException ue) {
                    LOG.error("Underreplication manager unavailable running periodic check", ue);
                    processDone.countDown();
                    return;
                }
                LedgerHandle lh = null;
                try {
                    lh = admin.openLedgerNoRecovery(ledgerId);
                    checker.checkLedger(lh, new ProcessLostFragmentsCb(lh, callback), conf.getAuditorLedgerVerificationPercentage());
                    // we collect the following stats to get a measure of the
                    // distribution of a single ledger within the bk cluster
                    // the higher the number of fragments/bookies, the more distributed it is
                    numFragmentsPerLedger.registerSuccessfulValue(lh.getNumFragments());
                    numBookiesPerLedger.registerSuccessfulValue(lh.getNumBookies());
                    numLedgersChecked.inc();
                } catch (BKException.BKNoSuchLedgerExistsException bknsle) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Ledger was deleted before we could check it", bknsle);
                    }
                    callback.processResult(BKException.Code.OK, null, null);
                    return;
                } catch (BKException bke) {
                    LOG.error("Couldn't open ledger " + ledgerId, bke);
                    callback.processResult(BKException.Code.BookieHandleNotAvailableException, null, null);
                    return;
                } catch (InterruptedException ie) {
                    LOG.error("Interrupted opening ledger", ie);
                    Thread.currentThread().interrupt();
                    callback.processResult(BKException.Code.InterruptedException, null, null);
                    return;
                } finally {
                    if (lh != null) {
                        try {
                            lh.close();
                        } catch (BKException bke) {
                            LOG.warn("Couldn't close ledger " + ledgerId, bke);
                        } catch (InterruptedException ie) {
                            LOG.warn("Interrupted closing ledger " + ledgerId, ie);
                            Thread.currentThread().interrupt();
                        }
                    }
                }
            }
        };
        ledgerManager.asyncProcessLedgers(checkLedgersProcessor, new AsyncCallback.VoidCallback() {

            @Override
            public void processResult(int rc, String s, Object obj) {
                returnCode.set(rc);
                processDone.countDown();
            }
        }, null, BKException.Code.OK, BKException.Code.ReadException);
        try {
            processDone.await();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new BKAuditException("Exception while checking ledgers", e);
        }
        if (returnCode.get() != BKException.Code.OK) {
            throw BKException.create(returnCode.get());
        }
    } finally {
        admin.close();
        client.close();
        newzk.close();
    }
}
Also used : Processor(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor) AsyncCallback(org.apache.zookeeper.AsyncCallback) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException) BookKeeperAdmin(org.apache.bookkeeper.client.BookKeeperAdmin) LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) BookKeeper(org.apache.bookkeeper.client.BookKeeper) CountDownLatch(java.util.concurrent.CountDownLatch) ZooKeeper(org.apache.zookeeper.ZooKeeper) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LedgerChecker(org.apache.bookkeeper.client.LedgerChecker) BKException(org.apache.bookkeeper.client.BKException) ClientConfiguration(org.apache.bookkeeper.conf.ClientConfiguration) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException)

Example 3 with BKAuditException

use of org.apache.bookkeeper.replication.ReplicationException.BKAuditException in project bookkeeper by apache.

the class Auditor method publishSuspectedLedgers.

private void publishSuspectedLedgers(String bookieIP, Set<Long> ledgers) throws BKAuditException {
    if (null == ledgers || ledgers.size() == 0) {
        // there is no ledgers available for this bookie and just
        // ignoring the bookie failures
        LOG.info("There is no ledgers for the failed bookie: {}", bookieIP);
        return;
    }
    LOG.info("Following ledgers: {} of bookie: {} are identified as underreplicated", ledgers, bookieIP);
    numUnderReplicatedLedger.registerSuccessfulValue(ledgers.size());
    for (Long ledgerId : ledgers) {
        try {
            ledgerUnderreplicationManager.markLedgerUnderreplicated(ledgerId, bookieIP);
        } catch (UnavailableException ue) {
            throw new BKAuditException("Failed to publish underreplicated ledger: " + ledgerId + " of bookie: " + bookieIP, ue);
        }
    }
}
Also used : UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException)

Example 4 with BKAuditException

use of org.apache.bookkeeper.replication.ReplicationException.BKAuditException in project bookkeeper by apache.

the class Auditor method submitLostBookieRecoveryDelayChangedEvent.

synchronized Future<?> submitLostBookieRecoveryDelayChangedEvent() {
    if (executor.isShutdown()) {
        SettableFuture<Void> f = SettableFuture.<Void>create();
        f.setException(new BKAuditException("Auditor shutting down"));
        return f;
    }
    return executor.submit(new Runnable() {

        int lostBookieRecoveryDelay = -1;

        public void run() {
            try {
                waitIfLedgerReplicationDisabled();
                lostBookieRecoveryDelay = Auditor.this.ledgerUnderreplicationManager.getLostBookieRecoveryDelay();
                // after new lostBookieRecoveryDelay period
                if (auditTask != null) {
                    LOG.info("lostBookieRecoveryDelay period has been changed so canceling the pending AuditTask");
                    auditTask.cancel(false);
                    numDelayedBookieAuditsCancelled.inc();
                }
                // signal to trigger the Audit immediately.
                if ((lostBookieRecoveryDelay == 0) || (lostBookieRecoveryDelay == lostBookieRecoveryDelayBeforeChange)) {
                    LOG.info("lostBookieRecoveryDelay has been set to 0 or reset to its previous value, " + "so starting AuditTask. Current lostBookieRecoveryDelay: {}, " + "previous lostBookieRecoveryDelay: {}", lostBookieRecoveryDelay, lostBookieRecoveryDelayBeforeChange);
                    startAudit(false);
                    auditTask = null;
                    bookiesToBeAudited.clear();
                } else if (auditTask != null) {
                    LOG.info("lostBookieRecoveryDelay has been set to {}, so rescheduling AuditTask accordingly", lostBookieRecoveryDelay);
                    auditTask = executor.schedule(new Runnable() {

                        public void run() {
                            startAudit(false);
                            auditTask = null;
                            bookiesToBeAudited.clear();
                        }
                    }, lostBookieRecoveryDelay, TimeUnit.SECONDS);
                    numBookieAuditsDelayed.inc();
                }
            } catch (InterruptedException ie) {
                Thread.currentThread().interrupt();
                LOG.error("Interrupted while for LedgersReplication to be enabled ", ie);
            } catch (UnavailableException ue) {
                LOG.error("Exception while reading from ZK", ue);
            } finally {
                if (lostBookieRecoveryDelay != -1) {
                    lostBookieRecoveryDelayBeforeChange = lostBookieRecoveryDelay;
                }
            }
        }
    });
}
Also used : UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException)

Example 5 with BKAuditException

use of org.apache.bookkeeper.replication.ReplicationException.BKAuditException in project bookkeeper by apache.

the class BookKeeperAdmin method decommissionBookie.

/**
 * Triggers AuditTask by resetting lostBookieRecoveryDelay and then make
 * sure the ledgers stored in the given decommissioning bookie are properly
 * replicated and they are not underreplicated because of the given bookie.
 * This method waits untill there are no underreplicatedledgers because of this
 * bookie. If the given Bookie is not shutdown yet, then it will throw
 * BKIllegalOpException.
 *
 * @param bookieAddress
 *            address of the decommissioning bookie
 * @throws CompatibilityException
 * @throws UnavailableException
 * @throws KeeperException
 * @throws InterruptedException
 * @throws IOException
 * @throws BKAuditException
 * @throws TimeoutException
 * @throws BKException
 */
public void decommissionBookie(BookieSocketAddress bookieAddress) throws CompatibilityException, UnavailableException, KeeperException, InterruptedException, IOException, BKAuditException, TimeoutException, BKException {
    if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookies().contains(bookieAddress)) {
        LOG.error("Bookie: {} is not shutdown yet", bookieAddress);
        throw BKException.create(BKException.Code.IllegalOpException);
    }
    triggerAudit();
    /*
         * Sleep for 30 secs, so that Auditor gets chance to trigger its
         * force audittask and let the underreplicationmanager process
         * to do its replication process
         */
    Thread.sleep(30 * 1000);
    /*
         * get the collection of the ledgers which are stored in this
         * bookie, by making a call to
         * bookieLedgerIndexer.getBookieToLedgerIndex.
         */
    BookieLedgerIndexer bookieLedgerIndexer = new BookieLedgerIndexer(bkc.ledgerManager);
    Map<String, Set<Long>> bookieToLedgersMap = bookieLedgerIndexer.getBookieToLedgerIndex();
    Set<Long> ledgersStoredInThisBookie = bookieToLedgersMap.get(bookieAddress.toString());
    if ((ledgersStoredInThisBookie != null) && (!ledgersStoredInThisBookie.isEmpty())) {
        /*
             * wait untill all the ledgers are replicated to other
             * bookies by making sure that these ledgers metadata don't
             * contain this bookie as part of their ensemble.
             */
        waitForLedgersToBeReplicated(ledgersStoredInThisBookie, bookieAddress, bkc.ledgerManager);
    }
    // for double-checking, check if any ledgers are listed as underreplicated because of this bookie
    Predicate<List<String>> predicate = replicasList -> replicasList.contains(bookieAddress.toString());
    Iterator<Long> urLedgerIterator = underreplicationManager.listLedgersToRereplicate(predicate);
    if (urLedgerIterator.hasNext()) {
        // if there are any then wait and make sure those ledgers are replicated properly
        LOG.info("Still in some underreplicated ledgers metadata, this bookie is part of its ensemble. " + "Have to make sure that those ledger fragments are rereplicated");
        List<Long> urLedgers = new ArrayList<>();
        urLedgerIterator.forEachRemaining(urLedgers::add);
        waitForLedgersToBeReplicated(urLedgers, bookieAddress, bkc.ledgerManager);
    }
}
Also used : Enumeration(java.util.Enumeration) OpenCallback(org.apache.bookkeeper.client.AsyncCallback.OpenCallback) GenericCallback(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback) LoggerFactory(org.slf4j.LoggerFactory) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RegistrationListener(org.apache.bookkeeper.discover.RegistrationClient.RegistrationListener) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SingleFragmentCallback(org.apache.bookkeeper.client.LedgerFragmentReplicator.SingleFragmentCallback) Map(java.util.Map) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException) BookieException(org.apache.bookkeeper.bookie.BookieException) NullStatsLogger(org.apache.bookkeeper.stats.NullStatsLogger) AuditorElector(org.apache.bookkeeper.replication.AuditorElector) BookieLedgerIndexer(org.apache.bookkeeper.replication.BookieLedgerIndexer) LedgerUnderreplicationManager(org.apache.bookkeeper.meta.LedgerUnderreplicationManager) Predicate(java.util.function.Predicate) Collection(java.util.Collection) Set(java.util.Set) Sets(com.google.common.collect.Sets) List(java.util.List) Entry(java.util.Map.Entry) StatsLogger(org.apache.bookkeeper.stats.StatsLogger) Optional(java.util.Optional) MetadataDrivers.runFunctionWithRegistrationManager(org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithRegistrationManager) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) SortedMap(java.util.SortedMap) MultiCallback(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback) LedgerRangeIterator(org.apache.bookkeeper.meta.LedgerManager.LedgerRangeIterator) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) CompatibilityException(org.apache.bookkeeper.replication.ReplicationException.CompatibilityException) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) Processor(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor) HashSet(java.util.HashSet) Lists(com.google.common.collect.Lists) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) RecoverCallback(org.apache.bookkeeper.client.AsyncCallback.RecoverCallback) SyncReadCallback(org.apache.bookkeeper.client.SyncCallbackUtils.SyncReadCallback) LinkedList(java.util.LinkedList) NoSuchElementException(java.util.NoSuchElementException) Bookie(org.apache.bookkeeper.bookie.Bookie) MetadataDrivers.runFunctionWithMetadataBookieDriver(org.apache.bookkeeper.meta.MetadataDrivers.runFunctionWithMetadataBookieDriver) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeeperException(org.apache.zookeeper.KeeperException) LedgerManager(org.apache.bookkeeper.meta.LedgerManager) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) SyncOpenCallback(org.apache.bookkeeper.client.SyncCallbackUtils.SyncOpenCallback) File(java.io.File) ServerConfiguration(org.apache.bookkeeper.conf.ServerConfiguration) ExecutionException(java.util.concurrent.ExecutionException) LedgerManagerFactory(org.apache.bookkeeper.meta.LedgerManagerFactory) IOUtils(org.apache.bookkeeper.util.IOUtils) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) ClientConfiguration(org.apache.bookkeeper.conf.ClientConfiguration) AsyncCallback(org.apache.zookeeper.AsyncCallback) AbstractFuture(com.google.common.util.concurrent.AbstractFuture) Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) BookieLedgerIndexer(org.apache.bookkeeper.replication.BookieLedgerIndexer) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList)

Aggregations

BKAuditException (org.apache.bookkeeper.replication.ReplicationException.BKAuditException)6 UnavailableException (org.apache.bookkeeper.replication.ReplicationException.UnavailableException)5 ArrayList (java.util.ArrayList)3 Processor (org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.Processor)3 AsyncCallback (org.apache.zookeeper.AsyncCallback)3 Collection (java.util.Collection)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Set (java.util.Set)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ClientConfiguration (org.apache.bookkeeper.conf.ClientConfiguration)2 BookieSocketAddress (org.apache.bookkeeper.net.BookieSocketAddress)2 GenericCallback (org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Lists (com.google.common.collect.Lists)1 Maps (com.google.common.collect.Maps)1 Sets (com.google.common.collect.Sets)1 AbstractFuture (com.google.common.util.concurrent.AbstractFuture)1