Search in sources :

Example 11 with BookieSocketAddress

use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.

the class AuditorPeriodicCheckTest method testFailedWriteRecovery.

/*
     * Validates that the periodic ledger check will fix entries with a failed write.
     */
@Test
public void testFailedWriteRecovery() throws Exception {
    LedgerManagerFactory mFactory = driver.getLedgerManagerFactory();
    LedgerUnderreplicationManager underReplicationManager = mFactory.newLedgerUnderreplicationManager();
    underReplicationManager.disableLedgerReplication();
    LedgerHandle lh = bkc.createLedger(2, 2, 1, DigestType.CRC32, "passwd".getBytes());
    // kill one of the bookies and replace it with one that rejects write;
    // This way we get into the under replication state
    BookieSocketAddress replacedBookie = replaceBookieWithWriteFailingBookie(lh);
    // Write a few entries; this should cause under replication
    byte[] data = "foobar".getBytes();
    data = "foobar".getBytes();
    lh.addEntry(data);
    lh.addEntry(data);
    lh.addEntry(data);
    lh.close();
    // enable under replication detection and wait for it to report
    // under replicated ledger
    underReplicationManager.enableLedgerReplication();
    long underReplicatedLedger = -1;
    for (int i = 0; i < 5; i++) {
        underReplicatedLedger = underReplicationManager.pollLedgerToRereplicate();
        if (underReplicatedLedger != -1) {
            break;
        }
        Thread.sleep(CHECK_INTERVAL * 1000);
    }
    assertEquals("Ledger should be under replicated", lh.getId(), underReplicatedLedger);
    // now start the replication workers
    List<ReplicationWorker> l = new ArrayList<ReplicationWorker>();
    for (int i = 0; i < numBookies; i++) {
        ReplicationWorker rw = new ReplicationWorker(zkc, bsConfs.get(i), NullStatsLogger.INSTANCE);
        rw.start();
        l.add(rw);
    }
    underReplicationManager.close();
    // Wait for ensemble to change after replication
    Thread.sleep(3000);
    for (ReplicationWorker rw : l) {
        rw.shutdown();
    }
    // check that ensemble has changed and the bookie that rejected writes has
    // been replaced in the ensemble
    LedgerHandle newLh = bkc.openLedger(lh.getId(), DigestType.CRC32, "passwd".getBytes());
    for (Map.Entry<Long, ArrayList<BookieSocketAddress>> e : LedgerHandleAdapter.getLedgerMetadata(newLh).getEnsembles().entrySet()) {
        ArrayList<BookieSocketAddress> ensemble = e.getValue();
        assertFalse("Ensemble hasn't been updated", ensemble.contains(replacedBookie));
    }
    newLh.close();
}
Also used : LedgerUnderreplicationManager(org.apache.bookkeeper.meta.LedgerUnderreplicationManager) LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) ArrayList(java.util.ArrayList) LedgerManagerFactory(org.apache.bookkeeper.meta.LedgerManagerFactory) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 12 with BookieSocketAddress

use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.

the class BookieAutoRecoveryTest method testOpenLedgers.

/**
 * Test verifies publish urLedger by Auditor and replication worker is
 * picking up the entries and finishing the rereplication of open ledger.
 */
@Test
public void testOpenLedgers() throws Exception {
    List<LedgerHandle> listOfLedgerHandle = createLedgersAndAddEntries(1, 5);
    LedgerHandle lh = listOfLedgerHandle.get(0);
    int ledgerReplicaIndex = 0;
    BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L).get(0);
    final String urLedgerZNode = getUrLedgerZNode(lh);
    ledgerReplicaIndex = getReplicaIndexInLedger(lh, replicaToKillAddr);
    CountDownLatch latch = new CountDownLatch(1);
    assertNull("UrLedger already exists!", watchUrLedgerNode(urLedgerZNode, latch));
    LOG.info("Killing Bookie :" + replicaToKillAddr);
    killBookie(replicaToKillAddr);
    // waiting to publish urLedger znode by Auditor
    latch.await();
    latch = new CountDownLatch(1);
    LOG.info("Watching on urLedgerPath:" + urLedgerZNode + " to know the status of rereplication process");
    assertNotNull("UrLedger doesn't exists!", watchUrLedgerNode(urLedgerZNode, latch));
    // starting the replication service, so that he will be able to act as
    // target bookie
    startNewBookie();
    int newBookieIndex = bs.size() - 1;
    BookieServer newBookieServer = bs.get(newBookieIndex);
    LOG.debug("Waiting to finish the replication of failed bookie : " + replicaToKillAddr);
    latch.await();
    // grace period to update the urledger metadata in zookeeper
    LOG.info("Waiting to update the urledger metadata in zookeeper");
    verifyLedgerEnsembleMetadataAfterReplication(newBookieServer, listOfLedgerHandle.get(0), ledgerReplicaIndex);
}
Also used : LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) BookieServer(org.apache.bookkeeper.proto.BookieServer) CountDownLatch(java.util.concurrent.CountDownLatch) Test(org.junit.Test)

Example 13 with BookieSocketAddress

use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.

the class BookieAutoRecoveryTest method testLedgerMetadataContainsHostNameAsBookieID.

/**
 * Test verifies bookie recovery, the host (recorded via useHostName in
 * ledgermetadata).
 */
@Test
public void testLedgerMetadataContainsHostNameAsBookieID() throws Exception {
    stopBKCluster();
    bkc = new BookKeeperTestClient(baseClientConf);
    // start bookie with useHostNameAsBookieID=false, as old bookie
    ServerConfiguration serverConf1 = newServerConfiguration();
    // start 2 more bookies with useHostNameAsBookieID=true
    ServerConfiguration serverConf2 = newServerConfiguration();
    serverConf2.setUseHostNameAsBookieID(true);
    ServerConfiguration serverConf3 = newServerConfiguration();
    serverConf3.setUseHostNameAsBookieID(true);
    bsConfs.add(serverConf1);
    bsConfs.add(serverConf2);
    bsConfs.add(serverConf3);
    bs.add(startBookie(serverConf1));
    bs.add(startBookie(serverConf2));
    bs.add(startBookie(serverConf3));
    List<LedgerHandle> listOfLedgerHandle = createLedgersAndAddEntries(1, 5);
    LedgerHandle lh = listOfLedgerHandle.get(0);
    int ledgerReplicaIndex = 0;
    final SortedMap<Long, ArrayList<BookieSocketAddress>> ensembles = LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles();
    final ArrayList<BookieSocketAddress> bkAddresses = ensembles.get(0L);
    BookieSocketAddress replicaToKillAddr = bkAddresses.get(0);
    for (BookieSocketAddress bookieSocketAddress : bkAddresses) {
        if (isCreatedFromIp(bookieSocketAddress)) {
            replicaToKillAddr = bookieSocketAddress;
            LOG.info("Kill bookie which has registered using ipaddress");
            break;
        }
    }
    final String urLedgerZNode = getUrLedgerZNode(lh);
    ledgerReplicaIndex = getReplicaIndexInLedger(lh, replicaToKillAddr);
    CountDownLatch latch = new CountDownLatch(1);
    assertNull("UrLedger already exists!", watchUrLedgerNode(urLedgerZNode, latch));
    LOG.info("Killing Bookie :" + replicaToKillAddr);
    killBookie(replicaToKillAddr);
    // waiting to publish urLedger znode by Auditor
    latch.await();
    latch = new CountDownLatch(1);
    LOG.info("Watching on urLedgerPath:" + urLedgerZNode + " to know the status of rereplication process");
    assertNotNull("UrLedger doesn't exists!", watchUrLedgerNode(urLedgerZNode, latch));
    // creates new bkclient
    bkc = new BookKeeperTestClient(baseClientConf);
    // starting the replication service, so that he will be able to act as
    // target bookie
    ServerConfiguration serverConf = newServerConfiguration();
    serverConf.setUseHostNameAsBookieID(true);
    bsConfs.add(serverConf);
    bs.add(startBookie(serverConf));
    int newBookieIndex = bs.size() - 1;
    BookieServer newBookieServer = bs.get(newBookieIndex);
    LOG.debug("Waiting to finish the replication of failed bookie : " + replicaToKillAddr);
    latch.await();
    // grace period to update the urledger metadata in zookeeper
    LOG.info("Waiting to update the urledger metadata in zookeeper");
    verifyLedgerEnsembleMetadataAfterReplication(newBookieServer, listOfLedgerHandle.get(0), ledgerReplicaIndex);
}
Also used : LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) ServerConfiguration(org.apache.bookkeeper.conf.ServerConfiguration) ArrayList(java.util.ArrayList) BookieServer(org.apache.bookkeeper.proto.BookieServer) BookKeeperTestClient(org.apache.bookkeeper.client.BookKeeperTestClient) CountDownLatch(java.util.concurrent.CountDownLatch) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) Test(org.junit.Test)

Example 14 with BookieSocketAddress

use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.

the class BookieAutoRecoveryTest method getReplicaIndexInLedger.

private int getReplicaIndexInLedger(LedgerHandle lh, BookieSocketAddress replicaToKill) {
    SortedMap<Long, ArrayList<BookieSocketAddress>> ensembles = LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles();
    int ledgerReplicaIndex = -1;
    for (BookieSocketAddress addr : ensembles.get(0L)) {
        ++ledgerReplicaIndex;
        if (addr.equals(replicaToKill)) {
            break;
        }
    }
    return ledgerReplicaIndex;
}
Also used : BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) ArrayList(java.util.ArrayList)

Example 15 with BookieSocketAddress

use of org.apache.bookkeeper.net.BookieSocketAddress in project bookkeeper by apache.

the class BookieAutoRecoveryTest method testStopWhileReplicationInProgress.

/**
 * Test stopping replica service while replication in progress. Considering
 * when there is an exception will shutdown Auditor and RW processes. After
 * restarting should be able to finish the re-replication activities
 */
@Test
public void testStopWhileReplicationInProgress() throws Exception {
    int numberOfLedgers = 2;
    List<Integer> listOfReplicaIndex = new ArrayList<Integer>();
    List<LedgerHandle> listOfLedgerHandle = createLedgersAndAddEntries(numberOfLedgers, 5);
    closeLedgers(listOfLedgerHandle);
    LedgerHandle handle = listOfLedgerHandle.get(0);
    BookieSocketAddress replicaToKillAddr = LedgerHandleAdapter.getLedgerMetadata(handle).getEnsembles().get(0L).get(0);
    LOG.info("Killing Bookie:" + replicaToKillAddr);
    // Each ledger, there will be two events : create urLedger and after
    // rereplication delete urLedger
    CountDownLatch latch = new CountDownLatch(listOfLedgerHandle.size());
    for (int i = 0; i < listOfLedgerHandle.size(); i++) {
        final String urLedgerZNode = getUrLedgerZNode(listOfLedgerHandle.get(i));
        assertNull("UrLedger already exists!", watchUrLedgerNode(urLedgerZNode, latch));
        int replicaIndexInLedger = getReplicaIndexInLedger(listOfLedgerHandle.get(i), replicaToKillAddr);
        listOfReplicaIndex.add(replicaIndexInLedger);
    }
    LOG.info("Killing Bookie :" + replicaToKillAddr);
    killBookie(replicaToKillAddr);
    // waiting to publish urLedger znode by Auditor
    latch.await();
    // Again watching the urLedger znode to know the replication status
    latch = new CountDownLatch(listOfLedgerHandle.size());
    for (LedgerHandle lh : listOfLedgerHandle) {
        String urLedgerZNode = getUrLedgerZNode(lh);
        LOG.info("Watching on urLedgerPath:" + urLedgerZNode + " to know the status of rereplication process");
        assertNotNull("UrLedger doesn't exists!", watchUrLedgerNode(urLedgerZNode, latch));
    }
    // starting the replication service, so that he will be able to act as
    // target bookie
    startNewBookie();
    int newBookieIndex = bs.size() - 1;
    BookieServer newBookieServer = bs.get(newBookieIndex);
    LOG.debug("Waiting to finish the replication of failed bookie : " + replicaToKillAddr);
    while (true) {
        if (latch.getCount() < numberOfLedgers || latch.getCount() <= 0) {
            stopReplicationService();
            LOG.info("Latch Count is:" + latch.getCount());
            break;
        }
        // grace period to take breath
        Thread.sleep(1000);
    }
    startReplicationService();
    LOG.info("Waiting to finish rereplication processes");
    latch.await();
    // grace period to update the urledger metadata in zookeeper
    LOG.info("Waiting to update the urledger metadata in zookeeper");
    for (int index = 0; index < listOfLedgerHandle.size(); index++) {
        verifyLedgerEnsembleMetadataAfterReplication(newBookieServer, listOfLedgerHandle.get(index), listOfReplicaIndex.get(index));
    }
}
Also used : LedgerHandle(org.apache.bookkeeper.client.LedgerHandle) BookieSocketAddress(org.apache.bookkeeper.net.BookieSocketAddress) ArrayList(java.util.ArrayList) BookieServer(org.apache.bookkeeper.proto.BookieServer) CountDownLatch(java.util.concurrent.CountDownLatch) Test(org.junit.Test)

Aggregations

BookieSocketAddress (org.apache.bookkeeper.net.BookieSocketAddress)254 Test (org.junit.Test)140 HashSet (java.util.HashSet)67 CountDownLatch (java.util.concurrent.CountDownLatch)42 ArrayList (java.util.ArrayList)40 ServerConfiguration (org.apache.bookkeeper.conf.ServerConfiguration)38 ClientConfiguration (org.apache.bookkeeper.conf.ClientConfiguration)37 BKNotEnoughBookiesException (org.apache.bookkeeper.client.BKException.BKNotEnoughBookiesException)29 HashMap (java.util.HashMap)28 Map (java.util.Map)24 LedgerHandle (org.apache.bookkeeper.client.LedgerHandle)23 IOException (java.io.IOException)21 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)19 BookieServer (org.apache.bookkeeper.proto.BookieServer)14 WriteCallback (org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback)13 Set (java.util.Set)11 ByteBuf (io.netty.buffer.ByteBuf)10 ByteBuffer (java.nio.ByteBuffer)10 LedgerMetadata (org.apache.bookkeeper.client.LedgerMetadata)10 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)10