Search in sources :

Example 31 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class SessionUpgradeTest method testUpgradeWithEphemeral.

private void testUpgradeWithEphemeral(boolean testLeader) throws Exception {
    String nodePrefix = "/testUpgrade-" + (testLeader ? "leaderTest-" : "followerTest-");
    int leaderIdx = qb.getLeaderIndex();
    assertFalse(leaderIdx == -1, "No leader in quorum?");
    int followerIdx = (leaderIdx + 1) % 5;
    int otherFollowerIdx = (leaderIdx + 2) % 5;
    int testPeerIdx = testLeader ? leaderIdx : followerIdx;
    String[] hostPorts = qb.hostPort.split(",");
    CountdownWatcher watcher = new CountdownWatcher();
    DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[testPeerIdx], CONNECTION_TIMEOUT, watcher);
    watcher.waitForConnected(CONNECTION_TIMEOUT);
    // be propagated to the other servers in the ensemble.
    for (int i = 0; i < 5; i++) {
        zk.create(nodePrefix + i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
    }
    // We should be able to reconnect with the same session id on a
    // different server, since it has been propagated.
    long localSessionId = zk.getSessionId();
    byte[] localSessionPwd = zk.getSessionPasswd().clone();
    zk.disconnect();
    watcher.reset();
    zk = new DisconnectableZooKeeper(hostPorts[otherFollowerIdx], CONNECTION_TIMEOUT, watcher, localSessionId, localSessionPwd);
    watcher.waitForConnected(CONNECTION_TIMEOUT);
    // The created ephemeral nodes are still around.
    for (int i = 0; i < 5; i++) {
        assertNotNull(zk.exists(nodePrefix + i, null));
    }
    // When we explicitly close the session, we should not be able to
    // reconnect with the same session id
    zk.close();
    try {
        watcher.reset();
        zk = new DisconnectableZooKeeper(hostPorts[otherFollowerIdx], CONNECTION_TIMEOUT, watcher, localSessionId, localSessionPwd);
        zk.exists(nodePrefix + "0", null);
        fail("Reconnecting to a closed session ID should fail.");
    } catch (KeeperException.SessionExpiredException e) {
    }
    watcher.reset();
    // And the ephemeral nodes will be gone since the session died.
    zk = new DisconnectableZooKeeper(hostPorts[testPeerIdx], CONNECTION_TIMEOUT, watcher);
    watcher.waitForConnected(CONNECTION_TIMEOUT);
    for (int i = 0; i < 5; i++) {
        assertNull(zk.exists(nodePrefix + i, null));
    }
}
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) KeeperException(org.apache.zookeeper.KeeperException)

Example 32 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class ClientRequestTimeoutTest method testClientRequestTimeout.

@Test
@Timeout(value = 120)
public void testClientRequestTimeout() throws Exception {
    int requestTimeOut = 15000;
    System.setProperty("zookeeper.request.timeout", Integer.toString(requestTimeOut));
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server + "\n");
    }
    String currentQuorumCfgSection = sb.toString();
    MainThread[] mt = new MainThread[SERVER_COUNT];
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false);
        mt[i].start();
    }
    // ensure server started
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    }
    CountdownWatcher watch1 = new CountdownWatcher();
    CustomZooKeeper zk = new CustomZooKeeper(getCxnString(clientPorts), ClientBase.CONNECTION_TIMEOUT, watch1);
    watch1.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    String data = "originalData";
    // lets see one successful operation
    zk.create("/clientHang1", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL);
    // now make environment for client hang
    dropPacket = true;
    dropPacketType = ZooDefs.OpCode.create;
    // Test synchronous API
    try {
        zk.create("/clientHang2", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        fail("KeeperException is expected.");
    } catch (KeeperException exception) {
        assertEquals(KeeperException.Code.REQUESTTIMEOUT.intValue(), exception.code().intValue());
    }
    // do cleanup
    zk.close();
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i].shutdown();
    }
}
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 33 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class EpochWriteFailureTest method testAcceptedEpochWriteFailure.

/*
     * Test case for https://issues.apache.org/jira/browse/ZOOKEEPER-2307
     * Expectation: During leader election when accepted epoch write to file
     * fails, it should not complete leader election, also it should not update
     * run time values of acceptedEpoch,
     */
@Test
@Timeout(value = 120)
public void testAcceptedEpochWriteFailure() throws Exception {
    StringBuilder sb = new StringBuilder();
    sb.append("admin.enableServer=false");
    sb.append("\n");
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server);
        sb.append("\n");
    }
    String currentQuorumCfgSection = sb.toString();
    for (int i = 0; i < SERVER_COUNT - 1; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false);
        mt[i].start();
    }
    // ensure two servers started
    for (int i = 0; i < SERVER_COUNT - 1; i++) {
        assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    }
    CountdownWatcher watch1 = new CountdownWatcher();
    zk = new ZooKeeper("127.0.0.1:" + clientPorts[0], ClientBase.CONNECTION_TIMEOUT, watch1);
    watch1.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    String data = "originalData";
    zk.create("/epochIssue", data.getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
    // initialize third server
    mt[2] = new MainThread(2, clientPorts[2], currentQuorumCfgSection, false) {

        @Override
        public TestQPMain getTestQPMain() {
            return new MockTestQPMain();
        }
    };
    // This server has problem it fails while writing acceptedEpoch.
    mt[2].start();
    /*
         * Verify that problematic server does not start as acceptedEpoch update
         * failure is injected and it keeps on trying to join the quorum
         */
    assertFalse(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[2], CONNECTION_TIMEOUT / 2), "verify server 2 not started");
    QuorumPeer quorumPeer = mt[2].getQuorumPeer();
    assertEquals(0, quorumPeer.getAcceptedEpoch(), "acceptedEpoch must not have changed");
    assertEquals(0, quorumPeer.getCurrentEpoch(), "currentEpoch must not have changed");
}
Also used : ZooKeeper(org.apache.zookeeper.ZooKeeper) CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 34 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class EphemeralNodeDeletionTest method testEphemeralNodeDeletion.

/**
 * Test case for https://issues.apache.org/jira/browse/ZOOKEEPER-2355.
 * ZooKeeper ephemeral node is never deleted if follower fail while reading
 * the proposal packet.
 */
@Test
@Timeout(value = 120)
public void testEphemeralNodeDeletion() throws Exception {
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server + "\n");
    }
    String currentQuorumCfgSection = sb.toString();
    // start all the servers
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            @Override
            public TestQPMain getTestQPMain() {
                return new MockTestQPMain();
            }
        };
        mt[i].start();
    }
    // ensure all servers started
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    }
    CountdownWatcher watch = new CountdownWatcher();
    ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[1], ClientBase.CONNECTION_TIMEOUT, watch);
    watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    /**
     * now the problem scenario starts
     */
    Stat firstEphemeralNode = new Stat();
    // 1: create ephemeral node
    String nodePath = "/e1";
    zk.create(nodePath, "1".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, firstEphemeralNode);
    assertEquals(zk.getSessionId(), firstEphemeralNode.getEphemeralOwner(), "Current session and ephemeral owner should be same");
    // 2: inject network problem in one of the follower
    CustomQuorumPeer follower = (CustomQuorumPeer) getByServerState(mt, ServerState.FOLLOWING);
    follower.setInjectError(true);
    // 3: close the session so that ephemeral node is deleted
    zk.close();
    // remove the error
    follower.setInjectError(false);
    assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + follower.getClientPort(), CONNECTION_TIMEOUT), "Faulted Follower should have joined quorum by now");
    QuorumPeer leader = getByServerState(mt, ServerState.LEADING);
    assertNotNull(leader, "Leader should not be null");
    assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + leader.getClientPort(), CONNECTION_TIMEOUT), "Leader must be running");
    watch = new CountdownWatcher();
    zk = new ZooKeeper("127.0.0.1:" + leader.getClientPort(), ClientBase.CONNECTION_TIMEOUT, watch);
    watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    Stat exists = zk.exists(nodePath, false);
    assertNull(exists, "Node must have been deleted from leader");
    CountdownWatcher followerWatch = new CountdownWatcher();
    ZooKeeper followerZK = new ZooKeeper("127.0.0.1:" + follower.getClientPort(), ClientBase.CONNECTION_TIMEOUT, followerWatch);
    followerWatch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    Stat nodeAtFollower = followerZK.exists(nodePath, false);
    // Problem 1: Follower had one extra ephemeral node /e1
    assertNull(nodeAtFollower, "ephemeral node must not exist");
    // Create the node with another session
    Stat currentEphemeralNode = new Stat();
    zk.create(nodePath, "2".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, currentEphemeralNode);
    // close the session and newly created ephemeral node should be deleted
    zk.close();
    SyncCallback cb = new SyncCallback();
    followerZK.sync(nodePath, cb, null);
    cb.sync.await(CONNECTION_TIMEOUT, TimeUnit.MILLISECONDS);
    nodeAtFollower = followerZK.exists(nodePath, false);
    // Problem 2: Before fix, after session close the ephemeral node
    // was not getting deleted. But now after the fix after session close
    // ephemeral node is getting deleted.
    assertNull(nodeAtFollower, "After session close ephemeral node must be deleted");
    followerZK.close();
}
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat(org.apache.zookeeper.data.Stat) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Example 35 with CountdownWatcher

use of org.apache.zookeeper.test.ClientBase.CountdownWatcher in project zookeeper by apache.

the class DIFFSyncConsistencyTest method testInconsistentDueToUncommittedLog.

@Test
@Timeout(value = 120)
public void testInconsistentDueToUncommittedLog() throws Exception {
    final int LEADER_TIMEOUT_MS = 10_000;
    final int[] clientPorts = new int[SERVER_COUNT];
    StringBuilder sb = new StringBuilder();
    String server;
    for (int i = 0; i < SERVER_COUNT; i++) {
        clientPorts[i] = PortAssignment.unique();
        server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + clientPorts[i];
        sb.append(server + "\n");
    }
    String currentQuorumCfgSection = sb.toString();
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, false) {

            @Override
            public TestQPMain getTestQPMain() {
                return new MockTestQPMain();
            }
        };
        mt[i].start();
    }
    for (int i = 0; i < SERVER_COUNT; i++) {
        assertTrue(ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], CONNECTION_TIMEOUT), "waiting for server " + i + " being up");
    }
    int leader = findLeader(mt);
    CountdownWatcher watch = new CountdownWatcher();
    ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[leader], ClientBase.CONNECTION_TIMEOUT, watch);
    watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
    Map<Long, Proposal> outstanding = mt[leader].main.quorumPeer.leader.outstandingProposals;
    // Increase the tick time to delay the leader going to looking to allow us proposal a transaction while other
    // followers are offline.
    int previousTick = mt[leader].main.quorumPeer.tickTime;
    mt[leader].main.quorumPeer.tickTime = LEADER_TIMEOUT_MS;
    // Let the previous tick on the leader exhaust itself so the new tick time takes effect
    Thread.sleep(previousTick);
    LOG.info("LEADER ELECTED {}", leader);
    // In other words, we want to make sure the followers get the proposal later through DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != leader) {
            mt[i].shutdown();
        }
    }
    // Send a create request to old leader and make sure it's synced to disk.
    try {
        zk.create("/zk" + leader, "zk".getBytes(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
        fail("create /zk" + leader + " should have failed");
    } catch (KeeperException e) {
    }
    // Make sure that we actually did get it in process at the leader; there can be extra sessionClose proposals.
    assertTrue(outstanding.size() > 0);
    Proposal p = findProposalOfType(outstanding, OpCode.create);
    LOG.info("Old leader id: {}. All proposals: {}", leader, outstanding);
    assertNotNull(p, "Old leader doesn't have 'create' proposal");
    // Make sure leader sync the proposal to disk.
    int sleepTime = 0;
    Long longLeader = (long) leader;
    while (!p.qvAcksetPairs.get(0).getAckset().contains(longLeader)) {
        if (sleepTime > 2000) {
            fail("Transaction not synced to disk within 1 second " + p.qvAcksetPairs.get(0).getAckset() + " expected " + leader);
        }
        Thread.sleep(100);
        sleepTime += 100;
    }
    // from DIFF sync.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
            continue;
        }
        mt[i].start();
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            ++sleepCount;
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
            }
            Thread.sleep(100);
        }
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(true);
        LOG.info("Follower {} started.", i);
    }
    // Verify leader can see it. The fact that leader can see it implies that
    // leader should, at this point in time, get a quorum of ACK of NEWLEADER
    // from two followers so leader can start serving requests; this also implies
    // that DIFF sync from leader to followers are finished at this point in time.
    // We then verify later that followers should have the same view after we shutdown
    // this leader, otherwise it's a violation of ZAB / sequential consistency.
    int c = 0;
    while (c < 100) {
        ++c;
        try {
            Stat stat = zk.exists("/zk" + leader, false);
            assertNotNull(stat, "server " + leader + " should have /zk");
            break;
        } catch (KeeperException.ConnectionLossException e) {
        }
        Thread.sleep(100);
    }
    // Shutdown all servers
    for (int i = 0; i < SERVER_COUNT; i++) {
        mt[i].shutdown();
    }
    waitForOne(zk, States.CONNECTING);
    // to sync to disk because we made them fail at UPTODATE.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i == leader) {
            continue;
        }
        mt[i].start();
        int sleepCount = 0;
        while (mt[i].getQuorumPeer() == null) {
            ++sleepCount;
            if (sleepCount > 100) {
                fail("Can't start follower " + i + " !");
            }
            Thread.sleep(100);
        }
        ((CustomQuorumPeer) mt[i].getQuorumPeer()).setInjectError(false);
        LOG.info("Follower {} started again.", i);
    }
    int newLeader = findLeader(mt);
    assertNotEquals(newLeader, leader, "new leader is still the old leader " + leader + " !!");
    // This inconsistent view of the quorum exposed from leaders is a violation of ZAB.
    for (int i = 0; i < SERVER_COUNT; i++) {
        if (i != newLeader) {
            continue;
        }
        zk.close();
        zk = new ZooKeeper("127.0.0.1:" + clientPorts[i], ClientBase.CONNECTION_TIMEOUT, watch);
        watch.waitForConnected(ClientBase.CONNECTION_TIMEOUT);
        Stat val = zk.exists("/zk" + leader, false);
        assertNotNull(val, "Data inconsistency detected! " + "Server " + i + " should have a view of /zk" + leader + "!");
    }
    zk.close();
}
Also used : CountdownWatcher(org.apache.zookeeper.test.ClientBase.CountdownWatcher) ZooKeeper(org.apache.zookeeper.ZooKeeper) Stat(org.apache.zookeeper.data.Stat) Proposal(org.apache.zookeeper.server.quorum.Leader.Proposal) KeeperException(org.apache.zookeeper.KeeperException) Test(org.junit.jupiter.api.Test) Timeout(org.junit.jupiter.api.Timeout)

Aggregations

CountdownWatcher (org.apache.zookeeper.test.ClientBase.CountdownWatcher)43 ZooKeeper (org.apache.zookeeper.ZooKeeper)40 Test (org.junit.jupiter.api.Test)33 Timeout (org.junit.jupiter.api.Timeout)26 HashMap (java.util.HashMap)14 KeeperException (org.apache.zookeeper.KeeperException)9 TimeoutException (java.util.concurrent.TimeoutException)7 Stat (org.apache.zookeeper.data.Stat)5 ClientTest (org.apache.zookeeper.test.ClientTest)5 IOException (java.io.IOException)4 File (java.io.File)3 TestableZooKeeper (org.apache.zookeeper.TestableZooKeeper)3 WatchedEvent (org.apache.zookeeper.WatchedEvent)3 ZooKeeperAdmin (org.apache.zookeeper.admin.ZooKeeperAdmin)3 ZKDatabase (org.apache.zookeeper.server.ZKDatabase)3 Collection (java.util.Collection)2 Set (java.util.Set)2 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)2 Semaphore (java.util.concurrent.Semaphore)2 TimeUnit (java.util.concurrent.TimeUnit)2