Search in sources :

Example 11 with Proposal

use of org.apache.zookeeper.server.quorum.Leader.Proposal in project zookeeper by apache.

the class QuorumPeerMainTest method testInconsistentDueToNewLeaderOrder.

/**
 * Currently, in SNAP sync, the leader will start queuing the
 * proposal/commits and the NEWLEADER packet before sending
 * over the snapshot over wire. So it's possible that the zxid
 * associated with the snapshot might be higher than all the
 * packets queued before NEWLEADER.
 *
 * When the follower received the snapshot, it will apply all
 * the txns queued before NEWLEADER, which may not cover all
 * the txns up to the zxid in the snapshot. After that, it
 * will write the snapshot out to disk with the zxid associated
 * with the snapshot. In case the server crashed after writing
 * this out, when loading the data from disk, it will use zxid
 * of the snapshot file to sync with leader, and it could cause
 * data inconsistent, because we only replayed partial of the
 * historical data during previous syncing.
 *
 * This test case is going to cover and simulate this scenario
 * and make sure there is no data inconsistency issue after fix.
 */
@Test
public void testInconsistentDueToNewLeaderOrder() throws Exception {
    // 1. set up an ensemble with 3 servers
    final int ENSEMBLE_SERVERS = 3;
    final int[] clientPorts = new int[ENSEMBLE_SERVERS];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < ENSEMBLE_SERVERS; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server + "\n");
    }
    String currentQuorumCfgSection = sb.toString();
    // start servers
    MainThread[] mt = new MainThread[ENSEMBLE_SERVERS];
    ZooKeeper[] zk = new ZooKeeper[ENSEMBLE_SERVERS];
    Context[] contexts = new Context[ENSEMBLE_SERVERS];
    for (int i = 0; i < ENSEMBLE_SERVERS; i++) {
        final Context context = new Context();
        contexts[i] = context;
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            @Override
            public TestQPMain getTestQPMain() {
                return new CustomizedQPMain(context);
            }
        };
        mt[i].start();
        zk[i] = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, this);
    }
    waitForAll(zk, States.CONNECTED);
    LOG.info("all servers started");
    String nodePath = "/testInconsistentDueToNewLeader";
    int leaderId = -1;
    int followerA = -1;
    for (int i = 0; i < ENSEMBLE_SERVERS; i++) {
        if (mt[i].main.quorumPeer.leader != null) {
            leaderId = i;
        } else if (followerA == -1) {
            followerA = i;
        }
    }
    LOG.info("shutdown follower {}", followerA);
    mt[followerA].shutdown();
    waitForOne(zk[followerA], States.CONNECTING);
    try {
        // 2. set force snapshot to be true
        LOG.info("force snapshot sync");
        System.setProperty(LearnerHandler.FORCE_SNAP_SYNC, "true");
        // 3. create a node
        String initialValue = "1";
        final ZooKeeper leaderZk = zk[leaderId];
        leaderZk.create(nodePath, initialValue.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        LOG.info("created node {} with value {}", nodePath, initialValue);
        CustomQuorumPeer leaderQuorumPeer = (CustomQuorumPeer) mt[leaderId].main.quorumPeer;
        // 4. on the customized leader catch the startForwarding call
        // (without synchronized), set the node to value v1, then
        // call the super.startForwarding to generate the ongoing
        // txn proposal and commit for v1 value update
        leaderQuorumPeer.setStartForwardingListener(new StartForwardingListener() {

            @Override
            public void start() {
                if (!Boolean.getBoolean(LearnerHandler.FORCE_SNAP_SYNC)) {
                    return;
                }
                final String value = "2";
                LOG.info("start forwarding, set {} to {}", nodePath, value);
                // ZKDatabase and the setData request will timeout
                try {
                    leaderZk.setData(nodePath, value.getBytes(), -1, (rc, path, ctx, stat) -> {
                    }, null);
                    // wait for the setData txn being populated
                    Thread.sleep(1000);
                } catch (Exception e) {
                    LOG.error("error when set {} to {}", nodePath, value, e);
                }
            }
        });
        // 5. on the customized leader catch the beginSnapshot call in
        // LearnerSyncThrottler to set the node to value v2,
        // wait it hit data tree
        leaderQuorumPeer.setBeginSnapshotListener(new BeginSnapshotListener() {

            @Override
            public void start() {
                String value = "3";
                LOG.info("before sending snapshot, set {} to {}", nodePath, value);
                try {
                    leaderZk.setData(nodePath, value.getBytes(), -1);
                    LOG.info("successfully set {} to {}", nodePath, value);
                } catch (Exception e) {
                    LOG.error("error when set {} to {}, {}", nodePath, value, e);
                }
            }
        });
        // 6. exit follower A after taking snapshot
        CustomQuorumPeer followerAQuorumPeer = ((CustomQuorumPeer) mt[followerA].main.quorumPeer);
        LOG.info("set exit when ack new leader packet on {}", followerA);
        contexts[followerA].exitWhenAckNewLeader = true;
        CountDownLatch latch = new CountDownLatch(1);
        final MainThread followerAMT = mt[followerA];
        contexts[followerA].newLeaderAckCallback = new NewLeaderAckCallback() {

            @Override
            public void start() {
                try {
                    latch.countDown();
                    followerAMT.shutdown();
                } catch (Exception e) {
                }
            }
        };
        // 7. start follower A to do snapshot sync
        LOG.info("starting follower {}", followerA);
        mt[followerA].start();
        assertTrue(latch.await(30, TimeUnit.SECONDS));
        // 8. now we have invalid data on disk, let's load it and verify
        LOG.info("disable exit when ack new leader packet on {}", followerA);
        System.setProperty(LearnerHandler.FORCE_SNAP_SYNC, "false");
        contexts[followerA].exitWhenAckNewLeader = true;
        contexts[followerA].newLeaderAckCallback = null;
        LOG.info("restarting follower {}", followerA);
        mt[followerA].start();
        zk[followerA].close();
        zk[followerA] = new ZooKeeper("127.0.0.1:" + clientPorts[followerA], ClientBase.CONNECTION_TIMEOUT, this);
        // 9. start follower A, after it's in broadcast state, make sure
        // the node value is same as what we have on leader
        waitForOne(zk[followerA], States.CONNECTED);
        assertEquals(new String(zk[followerA].getData(nodePath, null, null)), new String(zk[leaderId].getData(nodePath, null, null)));
    } finally {
        System.clearProperty(LearnerHandler.FORCE_SNAP_SYNC);
        for (int i = 0; i < ENSEMBLE_SERVERS; i++) {
            mt[i].shutdown();
            zk[i].close();
        }
    }
}
Also used : Assertions.fail(org.junit.jupiter.api.Assertions.fail) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) CreateMode(org.apache.zookeeper.CreateMode) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Mockito.doCallRealMethod(org.mockito.Mockito.doCallRealMethod) Time(org.apache.zookeeper.common.Time) NullMetricsProvider(org.apache.zookeeper.metrics.impl.NullMetricsProvider) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Ids(org.apache.zookeeper.ZooDefs.Ids) LoggerTestTool(org.apache.zookeeper.test.LoggerTestTool) Assertions.assertNotEquals(org.junit.jupiter.api.Assertions.assertNotEquals) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) TimeoutException(java.util.concurrent.TimeoutException) SaslException(javax.security.sasl.SaslException) ByteBuffer(java.nio.ByteBuffer) States(org.apache.zookeeper.ZooKeeper.States) Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal) ClientBase.createEmptyTestDir(org.apache.zookeeper.test.ClientBase.createEmptyTestDir) CONNECTION_TIMEOUT(org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT) Assertions.assertFalse(org.junit.jupiter.api.Assertions.assertFalse) SocketChannel(java.nio.channels.SocketChannel) FileTxnSnapLog(org.apache.zookeeper.server.persistence.FileTxnSnapLog) ServerMetrics(org.apache.zookeeper.server.ServerMetrics) Map(java.util.Map) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) PortAssignment(org.apache.zookeeper.PortAssignment) ZooKeeper(org.apache.zookeeper.ZooKeeper) OpCode(org.apache.zookeeper.ZooDefs.OpCode) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) LineNumberReader(java.io.LineNumberReader) FileUtils(org.apache.commons.io.FileUtils) Mockito.when(org.mockito.Mockito.when) InetSocketAddress(java.net.InetSocketAddress) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Test(org.junit.jupiter.api.Test) CountDownLatch(java.util.concurrent.CountDownLatch) X509Exception(org.apache.zookeeper.common.X509Exception) StringReader(java.io.StringReader) Paths(java.nio.file.Paths) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) Pattern(java.util.regex.Pattern) ClientBase(org.apache.zookeeper.test.ClientBase) BaseTestMetricsProvider(org.apache.zookeeper.metrics.BaseTestMetricsProvider) Mockito.mock(org.mockito.Mockito.mock) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) SaslException(javax.security.sasl.SaslException) KeeperException(org.apache.zookeeper.KeeperException) IOException(java.io.IOException) X509Exception(org.apache.zookeeper.common.X509Exception) ZooKeeper(org.apache.zookeeper.ZooKeeper) Test(org.junit.jupiter.api.Test)

Example 12 with Proposal

use of org.apache.zookeeper.server.quorum.Leader.Proposal in project zookeeper by apache.

the class DIFFSyncConsistencyTest method testInconsistentDueToUncommittedLog.

@Test
@Timeout(value = 120)
public void testInconsistentDueToUncommittedLog() throws Exception {
    final int LEADER_TIMEOUT_MS = 10_000;
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server + "\n");
    }
    String currentQuorumCfgSection = sb.toString();
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            @Override
            public TestQPMain getTestQPMain() {
                return new MockTestQPMain();
            }
        };
        mt[i].start();
    }
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    }
    int leader = findLeader(mt);
    CountdownWatcher watch = new CountdownWatcher();
    ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[leader], ClientBase.CONNECTION_TIMEOUT, watch);
    watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    Map<Long, Proposal> outstanding = mt[leader].main.quorumPeer.leader.outstandingProposals;
    // Increase the tick time to delay the leader going to looking to allow us proposal a transaction while other
    // followers are offline.
    int previousTick = mt[leader].main.quorumPeer.tickTime;
    mt[leader].main.quorumPeer.tickTime = LEADER_TIMEOUT_MS;
    // Let the previous tick on the leader exhaust itself so the new tick time takes effect
    Thread.sleep(previousTick);
    LOG.info("LEADER ELECTED {}", leader);
    // In other words, we want to make sure the followers get the proposal later through DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != leader) {
            mt[i].shutdown();
        }
    }
    // Send a create request to old leader and make sure it's synced to disk.
    try {
        zk.create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        fail("create /zk" + leader + " should have failed");
    } catch (KeeperException e) {
    }
    // Make sure that we actually did get it in process at the leader; there can be extra sessionClose proposals.
    assertTrue(outstanding.size() > 0);
    Proposal p = findProposalOfType(outstanding, OpCode.create);
    LOG.info("Old leader id: {}. All proposals: {}", leader, outstanding);
    assertNotNull(p, "Old leader doesn't have 'create' proposal");
    // Make sure leader sync the proposal to disk.
    int sleepTime = 0;
    Long longLeader = (long) leader;
    while (!p.qvAcksetPairs.get(0).getAckset().contains(longLeader)) {
        if (sleepTime > 2000) {
            fail("Transaction not synced to disk within 1 second " + p.qvAcksetPairs.get(0).getAckset() + " expected " + leader);
        }
        Thread.sleep(100);
        sleepTime += 100;
    }
    // from DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
            continue;
        }
        mt[i].start();
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            ++sleepCount;
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
            }
            Thread.sleep(100);
        }
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(true);
        LOG.info("Follower {} started.", i);
    }
    // Verify leader can see it. The fact that leader can see it implies that
    // leader should, at this point in time, get a quorum of ACK of NEWLEADER
    // from two followers so leader can start serving requests; this also implies
    // that DIFF sync from leader to followers are finished at this point in time.
    // We then verify later that followers should have the same view after we shutdown
    // this leader, otherwise it's a violation of ZAB / sequential consistency.
    int c = 0;
    while (c < 100) {
        ++c;
        try {
            Stat stat = zk.exists("/zk" + leader, false);
            assertNotNull(stat, "server " + leader + " should have /zk");
            break;
        } catch (KeeperException.ConnectionLossException e) {
        }
        Thread.sleep(100);
    }
    // Shutdown all servers
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i].shutdown();
    }
    waitForOne(zk, States.CONNECTING);
    // to sync to disk because we made them fail at UPTODATE.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
            continue;
        }
        mt[i].start();
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            ++sleepCount;
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
            }
            Thread.sleep(100);
        }
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(false);
        LOG.info("Follower {} started again.", i);
    }
    int newLeader = findLeader(mt);
    assertNotEquals(newLeader, leader, "new leader is still the old leader " + leader + " !!");
    // This inconsistent view of the quorum exposed from leaders is a violation of ZAB.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != newLeader) {
            continue;
        }
        zk.close();
        zk = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, watch);
        watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
        Stat val = zk.exists("/zk" + leader, false);
        assertNotNull(val, "Data inconsistency detected! " + "Server " + i + " should have a view of /zk" + leader + "!");
    }
    zk.close();
}
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat(org.apache.zookeeper.data.Stat) Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal) KeeperException(org.apache.zookeeper.KeeperException) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 13 with Proposal

use of org.apache.zookeeper.server.quorum.Leader.Proposal in project zookeeper by apache.

the class LearnerHandlerTest method createProposal.

Proposal createProposal(long zxid) {
    Proposal p = new Proposal();
    p.packet = new QuorumPacket();
    p.packet.setZxid(zxid);
    p.packet.setType(Leader.PROPOSAL);
    return p;
}
Also used : Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal)

Example 14 with Proposal

use of org.apache.zookeeper.server.quorum.Leader.Proposal in project zookeeper by apache.

the class GetProposalFromTxnTest method testGetProposalFromTxn.

/**
 * Test loading proposal from txnlog
 *
 * @throws Exception
 *             an exception might be thrown here
 */
@Test
public void testGetProposalFromTxn() throws Exception {
    File tmpDir = ClientBase.createTmpDir();
    ClientBase.setupTestEnv();
    ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
    SyncRequestProcessor.setSnapCount(100);
    final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
    ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
    f.startup(zks);
    assertTrue(ClientBase.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server being up ");
    ZooKeeper zk = ClientBase.createZKClient(HOSTPORT);
    // Generate transaction so we will have some txnlog
    Long[] zxids = new Long[MSG_COUNT];
    try {
        String data = "data";
        byte[] bytes = data.getBytes();
        for (int i = 0; i < MSG_COUNT; i++) {
            Stat stat = new Stat();
            zk.create("/invalidsnap-" + i, bytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
            zk.getData("/invalidsnap-" + i, null, stat);
            zxids[i] = stat.getCzxid();
        }
    } finally {
        zk.close();
    }
    // shutdown and start zookeeper again
    f.shutdown();
    zks.shutdown();
    assertTrue(ClientBase.waitForServerDown(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server to shutdown");
    zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
    zks.startdata();
    ZKDatabase db = zks.getZKDatabase();
    // Set sizeLimit to be very high number, so we can pull all transactions
    // from txnlog
    Iterator<Proposal> itr = db.getProposalsFromTxnLog(zxids[0], 10000000);
    int createCount = 0;
    ArrayList<Long> retrievedZxids = new ArrayList<Long>(MSG_COUNT);
    // Get zxid of create requests
    while (itr.hasNext()) {
        Proposal proposal = itr.next();
        TxnLogEntry logEntry = SerializeUtils.deserializeTxn(proposal.packet.getData());
        TxnHeader hdr = logEntry.getHeader();
        Record rec = logEntry.getTxn();
        if (hdr.getType() == OpCode.create) {
            retrievedZxids.add(hdr.getZxid());
            createCount++;
        }
    }
    // All zxid should match what we created
    assertTrue(Arrays.equals(zxids, retrievedZxids.toArray(new Long[0])), "Zxids missmatches");
    // There should be 2000 create requests
    assertTrue((createCount == MSG_COUNT), "create proposal count == " + MSG_COUNT);
    // We are requesting half the number of transaction from the snapshot
    // this should exceed threshold (ZKDatabase.snapshotSizeFactor)
    db.setSnapshotSizeFactor(0.33);
    long sizeLimit = db.calculateTxnLogSizeLimit();
    itr = db.getProposalsFromTxnLog(zxids[MSG_COUNT / 2], sizeLimit);
    assertFalse((itr.hasNext()), "Expect empty proposal");
    f.shutdown();
    zks.shutdown();
}
Also used : ArrayList(java.util.ArrayList) ZKDatabase(org.apache.zookeeper.server.ZKDatabase) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat(org.apache.zookeeper.data.Stat) TxnLogEntry(org.apache.zookeeper.server.TxnLogEntry) ServerCnxnFactory(org.apache.zookeeper.server.ServerCnxnFactory) Record(org.apache.jute.Record) File(java.io.File) ZooKeeperServer(org.apache.zookeeper.server.ZooKeeperServer) Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal) TxnHeader(org.apache.zookeeper.txn.TxnHeader) Test(org.junit.jupiter.api.Test)

Example 15 with Proposal

use of org.apache.zookeeper.server.quorum.Leader.Proposal in project zookeeper by apache.

the class LearnerHandler method syncFollower.

/**
 * Determine if we need to sync with follower using DIFF/TRUNC/SNAP
 * and setup follower to receive packets from commit processor
 *
 * @param peerLastZxid
 * @param learnerMaster
 * @return true if snapshot transfer is needed.
 */
boolean syncFollower(long peerLastZxid, LearnerMaster learnerMaster) {
    /*
         * When leader election is completed, the leader will set its
         * lastProcessedZxid to be (epoch < 32). There will be no txn associated
         * with this zxid.
         *
         * The learner will set its lastProcessedZxid to the same value if
         * it get DIFF or SNAP from the learnerMaster. If the same learner come
         * back to sync with learnerMaster using this zxid, we will never find this
         * zxid in our history. In this case, we will ignore TRUNC logic and
         * always send DIFF if we have old enough history
         */
    boolean isPeerNewEpochZxid = (peerLastZxid & 0xffffffffL) == 0;
    // Keep track of the latest zxid which already queued
    long currentZxid = peerLastZxid;
    boolean needSnap = true;
    ZKDatabase db = learnerMaster.getZKDatabase();
    boolean txnLogSyncEnabled = db.isTxnLogSyncEnabled();
    ReentrantReadWriteLock lock = db.getLogLock();
    ReadLock rl = lock.readLock();
    try {
        rl.lock();
        long maxCommittedLog = db.getmaxCommittedLog();
        long minCommittedLog = db.getminCommittedLog();
        long lastProcessedZxid = db.getDataTreeLastProcessedZxid();
        LOG.info("Synchronizing with Learner sid: {} maxCommittedLog=0x{}" + " minCommittedLog=0x{} lastProcessedZxid=0x{}" + " peerLastZxid=0x{}", getSid(), Long.toHexString(maxCommittedLog), Long.toHexString(minCommittedLog), Long.toHexString(lastProcessedZxid), Long.toHexString(peerLastZxid));
        if (db.getCommittedLog().isEmpty()) {
            /*
                 * It is possible that committedLog is empty. In that case
                 * setting these value to the latest txn in learnerMaster db
                 * will reduce the case that we need to handle
                 *
                 * Here is how each case handle by the if block below
                 * 1. lastProcessZxid == peerZxid -> Handle by (2)
                 * 2. lastProcessZxid < peerZxid -> Handle by (3)
                 * 3. lastProcessZxid > peerZxid -> Handle by (5)
                 */
            minCommittedLog = lastProcessedZxid;
            maxCommittedLog = lastProcessedZxid;
        }
        if (forceSnapSync) {
            // Force learnerMaster to use snapshot to sync with follower
            LOG.warn("Forcing snapshot sync - should not see this in production");
        } else if (lastProcessedZxid == peerLastZxid) {
            // Follower is already sync with us, send empty diff
            LOG.info("Sending DIFF zxid=0x{} for peer sid: {}", Long.toHexString(peerLastZxid), getSid());
            queueOpPacket(Leader.DIFF, peerLastZxid);
            needOpPacket = false;
            needSnap = false;
        } else if (peerLastZxid > maxCommittedLog && !isPeerNewEpochZxid) {
            // Newer than committedLog, send trunc and done
            LOG.debug("Sending TRUNC to follower zxidToSend=0x{} for peer sid:{}", Long.toHexString(maxCommittedLog), getSid());
            queueOpPacket(Leader.TRUNC, maxCommittedLog);
            currentZxid = maxCommittedLog;
            needOpPacket = false;
            needSnap = false;
        } else if ((maxCommittedLog >= peerLastZxid) && (minCommittedLog <= peerLastZxid)) {
            // Follower is within commitLog range
            LOG.info("Using committedLog for peer sid: {}", getSid());
            Iterator<Proposal> itr = db.getCommittedLog().iterator();
            currentZxid = queueCommittedProposals(itr, peerLastZxid, null, maxCommittedLog);
            needSnap = false;
        } else if (peerLastZxid < minCommittedLog && txnLogSyncEnabled) {
            // Use txnlog and committedLog to sync
            // Calculate sizeLimit that we allow to retrieve txnlog from disk
            long sizeLimit = db.calculateTxnLogSizeLimit();
            // This method can return empty iterator if the requested zxid
            // is older than on-disk txnlog
            Iterator<Proposal> txnLogItr = db.getProposalsFromTxnLog(peerLastZxid, sizeLimit);
            if (txnLogItr.hasNext()) {
                LOG.info("Use txnlog and committedLog for peer sid: {}", getSid());
                currentZxid = queueCommittedProposals(txnLogItr, peerLastZxid, minCommittedLog, maxCommittedLog);
                if (currentZxid < minCommittedLog) {
                    LOG.info("Detected gap between end of txnlog: 0x{} and start of committedLog: 0x{}", Long.toHexString(currentZxid), Long.toHexString(minCommittedLog));
                    currentZxid = peerLastZxid;
                    // Clear out currently queued requests and revert
                    // to sending a snapshot.
                    queuedPackets.clear();
                    needOpPacket = true;
                } else {
                    LOG.debug("Queueing committedLog 0x{}", Long.toHexString(currentZxid));
                    Iterator<Proposal> committedLogItr = db.getCommittedLog().iterator();
                    currentZxid = queueCommittedProposals(committedLogItr, currentZxid, null, maxCommittedLog);
                    needSnap = false;
                }
            }
            // closing the resources
            if (txnLogItr instanceof TxnLogProposalIterator) {
                TxnLogProposalIterator txnProposalItr = (TxnLogProposalIterator) txnLogItr;
                txnProposalItr.close();
            }
        } else {
            LOG.warn("Unhandled scenario for peer sid: {} maxCommittedLog=0x{}" + " minCommittedLog=0x{} lastProcessedZxid=0x{}" + " peerLastZxid=0x{} txnLogSyncEnabled={}", getSid(), Long.toHexString(maxCommittedLog), Long.toHexString(minCommittedLog), Long.toHexString(lastProcessedZxid), Long.toHexString(peerLastZxid), txnLogSyncEnabled);
        }
        if (needSnap) {
            currentZxid = db.getDataTreeLastProcessedZxid();
        }
        LOG.debug("Start forwarding 0x{} for peer sid: {}", Long.toHexString(currentZxid), getSid());
        leaderLastZxid = learnerMaster.startForwarding(this, currentZxid);
    } finally {
        rl.unlock();
    }
    if (needOpPacket && !needSnap) {
        // This should never happen, but we should fall back to sending
        // snapshot just in case.
        LOG.error("Unhandled scenario for peer sid: {} fall back to use snapshot", getSid());
        needSnap = true;
    }
    return needSnap;
}
Also used : ReadLock(java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock) TxnLogProposalIterator(org.apache.zookeeper.server.TxnLogProposalIterator) Iterator(java.util.Iterator) TxnLogProposalIterator(org.apache.zookeeper.server.TxnLogProposalIterator) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) ZKDatabase(org.apache.zookeeper.server.ZKDatabase) Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal)

Aggregations

Proposal (org.apache.zookeeper.server.quorum.Leader.Proposal)16 ZooKeeper (org.apache.zookeeper.ZooKeeper)6 Test (org.junit.jupiter.api.Test)6 File (java.io.File)4 KeeperException (org.apache.zookeeper.KeeperException)4 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)3 ReadLock (java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock)3 ServerCnxnFactory (org.apache.zookeeper.server.ServerCnxnFactory)3 ZKDatabase (org.apache.zookeeper.server.ZKDatabase)3 ZooKeeperServer (org.apache.zookeeper.server.ZooKeeperServer)3 IOException (java.io.IOException)2 Iterator (java.util.Iterator)2 Stat (org.apache.zookeeper.data.Stat)2 QuorumPeer (org.apache.zookeeper.server.quorum.QuorumPeer)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 LineNumberReader (java.io.LineNumberReader)1 StringReader (java.io.StringReader)1 InetSocketAddress (java.net.InetSocketAddress)1 ByteBuffer (java.nio.ByteBuffer)1 SocketChannel (java.nio.channels.SocketChannel)1