Search in sources :

Example 1 with MultiCallback

use of org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback in project bookkeeper by apache.

the class BookKeeperAdmin method recoverLedger.

/**
 * This method asynchronously recovers a given ledger if any of the ledger
 * entries were stored on the failed bookie.
 *
 * @param bookiesSrc
 *            Source bookies that had a failure. We want to replicate the
 *            ledger fragments that were stored there.
 * @param lId
 *            Ledger id we want to recover.
 * @param dryrun
 *            printing the recovery plan without actually recovering bookies
 * @param skipOpenLedgers
 *            Skip recovering open ledgers.
 * @param finalLedgerIterCb
 *            IterationCallback to invoke once we've recovered the current
 *            ledger.
 */
private void recoverLedger(final Set<BookieSocketAddress> bookiesSrc, final long lId, final boolean dryrun, final boolean skipOpenLedgers, final AsyncCallback.VoidCallback finalLedgerIterCb) {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Recovering ledger : {}", lId);
    }
    asyncOpenLedgerNoRecovery(lId, new OpenCallback() {

        @Override
        public void openComplete(int rc, final LedgerHandle lh, Object ctx) {
            if (rc != BKException.Code.OK) {
                LOG.error("BK error opening ledger: " + lId, BKException.create(rc));
                finalLedgerIterCb.processResult(rc, null, null);
                return;
            }
            LedgerMetadata lm = lh.getLedgerMetadata();
            if (skipOpenLedgers && !lm.isClosed() && !lm.isInRecovery()) {
                LOG.info("Skip recovering open ledger {}.", lId);
                try {
                    lh.close();
                } catch (InterruptedException ie) {
                    Thread.currentThread().interrupt();
                } catch (BKException bke) {
                    LOG.warn("Error on closing ledger handle for {}.", lId);
                }
                finalLedgerIterCb.processResult(BKException.Code.OK, null, null);
                return;
            }
            final boolean fenceRequired = !lm.isClosed() && containBookiesInLastEnsemble(lm, bookiesSrc);
            // the recovery tool should first close the ledger
            if (!dryrun && fenceRequired) {
                // close opened non recovery ledger handle
                try {
                    lh.close();
                } catch (Exception ie) {
                    LOG.warn("Error closing non recovery ledger handle for ledger " + lId, ie);
                }
                asyncOpenLedger(lId, new OpenCallback() {

                    @Override
                    public void openComplete(int newrc, final LedgerHandle newlh, Object newctx) {
                        if (newrc != BKException.Code.OK) {
                            LOG.error("BK error close ledger: " + lId, BKException.create(newrc));
                            finalLedgerIterCb.processResult(newrc, null, null);
                            return;
                        }
                        bkc.mainWorkerPool.submit(() -> {
                            // do recovery
                            recoverLedger(bookiesSrc, lId, dryrun, skipOpenLedgers, finalLedgerIterCb);
                        });
                    }
                }, null);
                return;
            }
            final AsyncCallback.VoidCallback ledgerIterCb = new AsyncCallback.VoidCallback() {

                @Override
                public void processResult(int rc, String path, Object ctx) {
                    if (BKException.Code.OK != rc) {
                        LOG.error("Failed to recover ledger {} : {}", lId, BKException.codeLogger(rc));
                    } else {
                        LOG.info("Recovered ledger {}.", lId);
                    }
                    try {
                        lh.close();
                    } catch (InterruptedException ie) {
                        Thread.currentThread().interrupt();
                    } catch (BKException bke) {
                        LOG.warn("Error on closing ledger handle for {}.", lId);
                    }
                    finalLedgerIterCb.processResult(rc, path, ctx);
                }
            };
            /*
                 * This List stores the ledger fragments to recover indexed by
                 * the start entry ID for the range. The ensembles TreeMap is
                 * keyed off this.
                 */
            final List<Long> ledgerFragmentsToRecover = new LinkedList<Long>();
            /*
                 * This Map will store the start and end entry ID values for
                 * each of the ledger fragment ranges. The only exception is the
                 * current active fragment since it has no end yet. In the event
                 * of a bookie failure, a new ensemble is created so the current
                 * ensemble should not contain the dead bookie we are trying to
                 * recover.
                 */
            Map<Long, Long> ledgerFragmentsRange = new HashMap<Long, Long>();
            Long curEntryId = null;
            for (Map.Entry<Long, ArrayList<BookieSocketAddress>> entry : lh.getLedgerMetadata().getEnsembles().entrySet()) {
                if (curEntryId != null) {
                    ledgerFragmentsRange.put(curEntryId, entry.getKey() - 1);
                }
                curEntryId = entry.getKey();
                if (containBookies(entry.getValue(), bookiesSrc)) {
                    /*
                         * Current ledger fragment has entries stored on the
                         * dead bookie so we'll need to recover them.
                         */
                    ledgerFragmentsToRecover.add(entry.getKey());
                }
            }
            // last ensemble would not be replicated.
            if (curEntryId != null) {
                ledgerFragmentsRange.put(curEntryId, lh.getLastAddConfirmed());
            }
            /*
                 * See if this current ledger contains any ledger fragment that
                 * needs to be re-replicated. If not, then just invoke the
                 * multiCallback and return.
                 */
            if (ledgerFragmentsToRecover.size() == 0) {
                ledgerIterCb.processResult(BKException.Code.OK, null, null);
                return;
            }
            if (dryrun) {
                VERBOSE.info("Recovered ledger {} : {}", lId, (fenceRequired ? "[fence required]" : ""));
            }
            /*
                 * Multicallback for ledger. Once all fragments for the ledger have been recovered
                 * trigger the ledgerIterCb
                 */
            MultiCallback ledgerFragmentsMcb = new MultiCallback(ledgerFragmentsToRecover.size(), ledgerIterCb, null, BKException.Code.OK, BKException.Code.LedgerRecoveryException);
            /*
                 * Now recover all of the necessary ledger fragments
                 * asynchronously using a MultiCallback for every fragment.
                 */
            for (final Long startEntryId : ledgerFragmentsToRecover) {
                Long endEntryId = ledgerFragmentsRange.get(startEntryId);
                ArrayList<BookieSocketAddress> ensemble = lh.getLedgerMetadata().getEnsembles().get(startEntryId);
                // Get bookies to replace
                Map<Integer, BookieSocketAddress> targetBookieAddresses;
                try {
                    targetBookieAddresses = getReplacementBookies(lh, ensemble, bookiesSrc);
                } catch (BKException.BKNotEnoughBookiesException e) {
                    if (!dryrun) {
                        ledgerFragmentsMcb.processResult(BKException.Code.NotEnoughBookiesException, null, null);
                    } else {
                        VERBOSE.info("  Fragment [{} - {}] : {}", startEntryId, endEntryId, BKException.getMessage(BKException.Code.NotEnoughBookiesException));
                    }
                    continue;
                }
                if (dryrun) {
                    ArrayList<BookieSocketAddress> newEnsemble = replaceBookiesInEnsemble(ensemble, targetBookieAddresses);
                    VERBOSE.info("  Fragment [{} - {}] : ", startEntryId, endEntryId);
                    VERBOSE.info("    old ensemble : {}", formatEnsemble(ensemble, bookiesSrc, '*'));
                    VERBOSE.info("    new ensemble : {}", formatEnsemble(newEnsemble, bookiesSrc, '*'));
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Replicating fragment from [{}, {}] of ledger {} to {}", startEntryId, endEntryId, lh.getId(), targetBookieAddresses);
                    }
                    try {
                        LedgerFragmentReplicator.SingleFragmentCallback cb = new LedgerFragmentReplicator.SingleFragmentCallback(ledgerFragmentsMcb, lh, startEntryId, getReplacementBookiesMap(ensemble, targetBookieAddresses));
                        LedgerFragment ledgerFragment = new LedgerFragment(lh, startEntryId, endEntryId, targetBookieAddresses.keySet());
                        asyncRecoverLedgerFragment(lh, ledgerFragment, cb, Sets.newHashSet(targetBookieAddresses.values()));
                    } catch (InterruptedException e) {
                        Thread.currentThread().interrupt();
                        return;
                    }
                }
            }
            if (dryrun) {
                ledgerIterCb.processResult(BKException.Code.OK, null, null);
            }
        }
    }, null);
}
Also used : AsyncCallback(org.apache.zookeeper.AsyncCallback) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) OpenCallback(org.apache.bookkeeper.client.AsyncCallback.OpenCallback) SyncOpenCallback(org.apache.bookkeeper.client.SyncCallbackUtils.SyncOpenCallback) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) SingleFragmentCallback(org.apache.bookkeeper.client.LedgerFragmentReplicator.SingleFragmentCallback) SingleFragmentCallback(org.apache.bookkeeper.client.LedgerFragmentReplicator.SingleFragmentCallback) TimeoutException(java.util.concurrent.TimeoutException) BKAuditException(org.apache.bookkeeper.replication.ReplicationException.BKAuditException) BookieException(org.apache.bookkeeper.bookie.BookieException) UnavailableException(org.apache.bookkeeper.replication.ReplicationException.UnavailableException) CompatibilityException(org.apache.bookkeeper.replication.ReplicationException.CompatibilityException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) NoSuchElementException(java.util.NoSuchElementException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) MultiCallback(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback) Map(java.util.Map) SortedMap(java.util.SortedMap) HashMap(java.util.HashMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap)

Example 2 with MultiCallback

use of org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback in project bookkeeper by apache.

the class LedgerFragmentReplicator method replicateFragmentInternal.

private void replicateFragmentInternal(final LedgerHandle lh, final LedgerFragment lf, final AsyncCallback.VoidCallback ledgerFragmentMcb, final Set<BookieSocketAddress> newBookies) throws InterruptedException {
    if (!lf.isClosed()) {
        LOG.error("Trying to replicate an unclosed fragment;" + " This is not safe {}", lf);
        ledgerFragmentMcb.processResult(BKException.Code.UnclosedFragmentException, null, null);
        return;
    }
    Long startEntryId = lf.getFirstStoredEntryId();
    Long endEntryId = lf.getLastStoredEntryId();
    if (endEntryId == null) {
        /*
             * Ideally this should never happen if bookie failure is taken care
             * of properly. Nothing we can do though in this case.
             */
        LOG.warn("Dead bookie (" + lf.getAddresses() + ") is still part of the current" + " active ensemble for ledgerId: " + lh.getId());
        ledgerFragmentMcb.processResult(BKException.Code.OK, null, null);
        return;
    }
    if (startEntryId > endEntryId || endEntryId <= INVALID_ENTRY_ID) {
        // for open ledger which there is no entry, the start entry id is 0,
        // the end entry id is -1.
        // we can return immediately to trigger forward read
        ledgerFragmentMcb.processResult(BKException.Code.OK, null, null);
        return;
    }
    /*
         * Add all the entries to entriesToReplicate list from
         * firstStoredEntryId to lastStoredEntryID.
         */
    List<Long> entriesToReplicate = new LinkedList<Long>();
    long lastStoredEntryId = lf.getLastStoredEntryId();
    for (long i = lf.getFirstStoredEntryId(); i <= lastStoredEntryId; i++) {
        entriesToReplicate.add(i);
    }
    /*
         * Now asynchronously replicate all of the entries for the ledger
         * fragment that were on the dead bookie.
         */
    MultiCallback ledgerFragmentEntryMcb = new MultiCallback(entriesToReplicate.size(), ledgerFragmentMcb, null, BKException.Code.OK, BKException.Code.LedgerRecoveryException);
    for (final Long entryId : entriesToReplicate) {
        recoverLedgerFragmentEntry(entryId, lh, ledgerFragmentEntryMcb, newBookies);
    }
}
Also used : MultiCallback(org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback) LinkedList(java.util.LinkedList)

Aggregations

LinkedList (java.util.LinkedList)2 MultiCallback (org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.MultiCallback)2 UncheckedExecutionException (com.google.common.util.concurrent.UncheckedExecutionException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Entry (java.util.Map.Entry)1 NoSuchElementException (java.util.NoSuchElementException)1 SortedMap (java.util.SortedMap)1 ConcurrentSkipListMap (java.util.concurrent.ConcurrentSkipListMap)1 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 BookieException (org.apache.bookkeeper.bookie.BookieException)1 OpenCallback (org.apache.bookkeeper.client.AsyncCallback.OpenCallback)1 SingleFragmentCallback (org.apache.bookkeeper.client.LedgerFragmentReplicator.SingleFragmentCallback)1 SyncOpenCallback (org.apache.bookkeeper.client.SyncCallbackUtils.SyncOpenCallback)1 BookieSocketAddress (org.apache.bookkeeper.net.BookieSocketAddress)1 BKAuditException (org.apache.bookkeeper.replication.ReplicationException.BKAuditException)1