Search in sources :

Example 31 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class SessionUpgradeTest method testUpgradeWithEphemeral.

private void testUpgradeWithEphemeral(boolean testLeader) throws Exception {
    String nodePrefix = "/testUpgrade-" + (testLeader ? "leaderTest-" : "followerTest-");
    int leaderIdx = qb.getLeaderIndex();
    assertFalse(leaderIdx == -1, "No leader in quorum?");
    int followerIdx = (leaderIdx + 1) % 5;
    int otherFollowerIdx = (leaderIdx + 2) % 5;
    int testPeerIdx = testLeader ? leaderIdx : followerIdx;
    String[] hostPorts = qb.hostPort.split(",");
    CountdownWatcher watcher = new CountdownWatcher();
    DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[testPeerIdx], CONNECTION_TIMEOUT, watcher);
    // be propagated to the other servers in the ensemble.
    for (int i = 0; i < 5; i++) {
        zk.create(nodePrefix + i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
    // We should be able to reconnect with the same session id on a
    // different server, since it has been propagated.
    long localSessionId = zk.getSessionId();
    byte[] localSessionPwd = zk.getSessionPasswd().clone();
    zk = new DisconnectableZooKeeper(hostPorts[otherFollowerIdx], CONNECTION_TIMEOUT, watcher, localSessionId, localSessionPwd);
    // The created ephemeral nodes are still around.
    for (int i = 0; i < 5; i++) {
        assertNotNull(zk.exists(nodePrefix + i, null));
    // When we explicitly close the session, we should not be able to
    // reconnect with the same session id
    try {
        zk = new DisconnectableZooKeeper(hostPorts[otherFollowerIdx], CONNECTION_TIMEOUT, watcher, localSessionId, localSessionPwd);
        zk.exists(nodePrefix + "0", null);
        fail("Reconnecting to a closed session ID should fail.");
    } catch (KeeperException.SessionExpiredException e) {
    // And the ephemeral nodes will be gone since the session died.
    zk = new DisconnectableZooKeeper(hostPorts[testPeerIdx], CONNECTION_TIMEOUT, watcher);
    for (int i = 0; i < 5; i++) {
        assertNull(zk.exists(nodePrefix + i, null));
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) KeeperException(org.apache.zookeeper.KeeperException)

Example 32 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class ClientRequestTimeoutTest method testClientRequestTimeout.

@Timeout(value = 120)
public void testClientRequestTimeout() throws Exception {
    int requestTimeOut = 15000;
    System.setProperty("zookeeper.request.timeout", Integer.toString(requestTimeOut));
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;" + clientPorts[i];
        sb.append(server + "\n");
    String currentQuorumCfgSection = sb.toString();
    MainThread[] mt = new MainThread[SERVER_COUNT];
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false);
    // ensure server started
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    CountdownWatcher watch1 = new CountdownWatcher();
    CustomZooKeeper zk = new CustomZooKeeper(getCxnString(clientPorts), ClientBase.CONNECTION_TIMEOUT, watch1);
    String data = "originalData";
    // lets see one successful operation
    zk.create("/clientHang1", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
    // now make environment for client hang
    dropPacket = true;
    dropPacketType = ZooDefs.OpCode.create;
    // Test synchronous API
    try {
        zk.create("/clientHang2", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        fail("KeeperException is expected.");
    } catch (KeeperException exception) {
        assertEquals(KeeperException.Code.REQUESTTIMEOUT.intValue(), exception.code().intValue());
    // do cleanup
    for (int i = 0; i < SERVER_COUNT; i++) {
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 33 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class EpochWriteFailureTest method testAcceptedEpochWriteFailure.

     * Test case for
     * Expectation: During leader election when accepted epoch write to file
     * fails, it should not complete leader election, also it should not update
     * run time values of acceptedEpoch,
@Timeout(value = 120)
public void testAcceptedEpochWriteFailure() throws Exception {
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;" + clientPorts[i];
    String currentQuorumCfgSection = sb.toString();
    for (int i = 0; i < SERVER_COUNT - 1; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false);
    // ensure two servers started
    for (int i = 0; i < SERVER_COUNT - 1; i++) {
        assertTrue(ClientBase.waitForServerUp("" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    CountdownWatcher watch1 = new CountdownWatcher();
    zk = new ZooKeeper("" + clientPorts[0], ClientBase.CONNECTION_TIMEOUT, watch1);
    String data = "originalData";
    zk.create("/epochIssue", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
    // initialize third server
    mt[2] = new MainThread(2, clientPorts[2], currentQuorumCfgSection, false) {

        public TestQPMain getTestQPMain() {
            return new MockTestQPMain();
    // This server has problem it fails while writing acceptedEpoch.
         * Verify that problematic server does not start as acceptedEpoch update
         * failure is injected and it keeps on trying to join the quorum
    assertFalse(ClientBase.waitForServerUp("" + clientPorts[2], CONNECTION_TIMEOUT / 2), "verify server 2 not started");
    QuorumPeer quorumPeer = mt[2].getQuorumPeer();
    assertEquals(0, quorumPeer.getAcceptedEpoch(), "acceptedEpoch must not have changed");
    assertEquals(0, quorumPeer.getCurrentEpoch(), "currentEpoch must not have changed");
Also used : ZooKeeper(org.apache.zookeeper.ZooKeeper) CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 34 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class EphemeralNodeDeletionTest method testEphemeralNodeDeletion.

 * Test case for
 * ZooKeeper ephemeral node is never deleted if follower fail while reading
 * the proposal packet.
@Timeout(value = 120)
public void testEphemeralNodeDeletion() throws Exception {
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;" + clientPorts[i];
        sb.append(server + "\n");
    String currentQuorumCfgSection = sb.toString();
    // start all the servers
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            public TestQPMain getTestQPMain() {
                return new MockTestQPMain();
    // ensure all servers started
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    CountdownWatcher watch = new CountdownWatcher();
    ZooKeeper zk = new ZooKeeper("" + clientPorts[1], ClientBase.CONNECTION_TIMEOUT, watch);
     * now the problem scenario starts
    Stat firstEphemeralNode = new Stat();
    // 1: create ephemeral node
    String nodePath = "/e1";
    zk.create(nodePath, "1".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, firstEphemeralNode);
    assertEquals(zk.getSessionId(), firstEphemeralNode.getEphemeralOwner(), "Current session and ephemeral owner should be same");
    // 2: inject network problem in one of the follower
    CustomQuorumPeer follower = (CustomQuorumPeer) getByServerState(mt, ServerState.FOLLOWING);
    // 3: close the session so that ephemeral node is deleted
    // remove the error
    assertTrue(ClientBase.waitForServerUp("" + follower.getClientPort(), CONNECTION_TIMEOUT), "Faulted Follower should have joined quorum by now");
    QuorumPeer leader = getByServerState(mt, ServerState.LEADING);
    assertNotNull(leader, "Leader should not be null");
    assertTrue(ClientBase.waitForServerUp("" + leader.getClientPort(), CONNECTION_TIMEOUT), "Leader must be running");
    watch = new CountdownWatcher();
    zk = new ZooKeeper("" + leader.getClientPort(), ClientBase.CONNECTION_TIMEOUT, watch);
    Stat exists = zk.exists(nodePath, false);
    assertNull(exists, "Node must have been deleted from leader");
    CountdownWatcher followerWatch = new CountdownWatcher();
    ZooKeeper followerZK = new ZooKeeper("" + follower.getClientPort(), ClientBase.CONNECTION_TIMEOUT, followerWatch);
    Stat nodeAtFollower = followerZK.exists(nodePath, false);
    // Problem 1: Follower had one extra ephemeral node /e1
    assertNull(nodeAtFollower, "ephemeral node must not exist");
    // Create the node with another session
    Stat currentEphemeralNode = new Stat();
    zk.create(nodePath, "2".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, currentEphemeralNode);
    // close the session and newly created ephemeral node should be deleted
    SyncCallback cb = new SyncCallback();
    followerZK.sync(nodePath, cb, null);
    nodeAtFollower = followerZK.exists(nodePath, false);
    // Problem 2: Before fix, after session close the ephemeral node
    // was not getting deleted. But now after the fix after session close
    // ephemeral node is getting deleted.
    assertNull(nodeAtFollower, "After session close ephemeral node must be deleted");
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat( Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 35 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class DIFFSyncConsistencyTest method testInconsistentDueToUncommittedLog.

@Timeout(value = 120)
public void testInconsistentDueToUncommittedLog() throws Exception {
    final int LEADER_TIMEOUT_MS = 10_000;
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;" + clientPorts[i];
        sb.append(server + "\n");
    String currentQuorumCfgSection = sb.toString();
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            public TestQPMain getTestQPMain() {
                return new MockTestQPMain();
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    int leader = findLeader(mt);
    CountdownWatcher watch = new CountdownWatcher();
    ZooKeeper zk = new ZooKeeper("" + clientPorts[leader], ClientBase.CONNECTION_TIMEOUT, watch);
    Map<Long, Proposal> outstanding = mt[leader].main.quorumPeer.leader.outstandingProposals;
    // Increase the tick time to delay the leader going to looking to allow us proposal a transaction while other
    // followers are offline.
    int previousTick = mt[leader].main.quorumPeer.tickTime;
    mt[leader].main.quorumPeer.tickTime = LEADER_TIMEOUT_MS;
    // Let the previous tick on the leader exhaust itself so the new tick time takes effect
    Thread.sleep(previousTick);"LEADER ELECTED {}", leader);
    // In other words, we want to make sure the followers get the proposal later through DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != leader) {
    // Send a create request to old leader and make sure it's synced to disk.
    try {
        zk.create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        fail("create /zk" + leader + " should have failed");
    } catch (KeeperException e) {
    // Make sure that we actually did get it in process at the leader; there can be extra sessionClose proposals.
    assertTrue(outstanding.size() > 0);
    Proposal p = findProposalOfType(outstanding, OpCode.create);"Old leader id: {}. All proposals: {}", leader, outstanding);
    assertNotNull(p, "Old leader doesn't have 'create' proposal");
    // Make sure leader sync the proposal to disk.
    int sleepTime = 0;
    Long longLeader = (long) leader;
    while (!p.qvAcksetPairs.get(0).getAckset().contains(longLeader)) {
        if (sleepTime > 2000) {
            fail("Transaction not synced to disk within 1 second " + p.qvAcksetPairs.get(0).getAckset() + " expected " + leader);
        sleepTime += 100;
    // from DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(true);"Follower {} started.", i);
    // Verify leader can see it. The fact that leader can see it implies that
    // leader should, at this point in time, get a quorum of ACK of NEWLEADER
    // from two followers so leader can start serving requests; this also implies
    // that DIFF sync from leader to followers are finished at this point in time.
    // We then verify later that followers should have the same view after we shutdown
    // this leader, otherwise it's a violation of ZAB / sequential consistency.
    int c = 0;
    while (c < 100) {
        try {
            Stat stat = zk.exists("/zk" + leader, false);
            assertNotNull(stat, "server " + leader + " should have /zk");
        } catch (KeeperException.ConnectionLossException e) {
    // Shutdown all servers
    for (int i = 0; i < SERVER_COUNT; i++) {
    waitForOne(zk, States.CONNECTING);
    // to sync to disk because we made them fail at UPTODATE.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(false);"Follower {} started again.", i);
    int newLeader = findLeader(mt);
    assertNotEquals(newLeader, leader, "new leader is still the old leader " + leader + " !!");
    // This inconsistent view of the quorum exposed from leaders is a violation of ZAB.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != newLeader) {
        zk = new ZooKeeper("" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, watch);
        Stat val = zk.exists("/zk" + leader, false);
        assertNotNull(val, "Data inconsistency detected! " + "Server " + i + " should have a view of /zk" + leader + "!");
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat( Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal) KeeperException(org.apache.zookeeper.KeeperException) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)


CountdownWatcher (org.apache.zookeeper.test.ClientBase.CountdownWatcher)43 ZooKeeper (org.apache.zookeeper.ZooKeeper)40 Test (org.junit.jupiter.api.Test)33 Timeout (org.junit.jupiter.api.Timeout)26 HashMap (java.util.HashMap)14 KeeperException (org.apache.zookeeper.KeeperException)9 TimeoutException (java.util.concurrent.TimeoutException)7 Stat ( ClientTest (org.apache.zookeeper.test.ClientTest)5 IOException ( File ( TestableZooKeeper (org.apache.zookeeper.TestableZooKeeper)3 WatchedEvent (org.apache.zookeeper.WatchedEvent)3 ZooKeeperAdmin (org.apache.zookeeper.admin.ZooKeeperAdmin)3 ZKDatabase (org.apache.zookeeper.server.ZKDatabase)3 Collection (java.util.Collection)2 Set (java.util.Set)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2 Semaphore (java.util.concurrent.Semaphore)2 TimeUnit (java.util.concurrent.TimeUnit)2