Search in sources :

Example 1 with Vote

use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.

the class LeaderElection method countVotes.

protected ElectionResult countVotes(HashMap<InetSocketAddress, Vote> votes, HashSet<Long> heardFrom) {
    final ElectionResult result = new ElectionResult();
    // Initialize with null vote
    result.vote = new Vote(Long.MIN_VALUE, Long.MIN_VALUE);
    result.winner = new Vote(Long.MIN_VALUE, Long.MIN_VALUE);
    // First, filter out votes from unheard-from machines. Then
    // make the views consistent. Sometimes peers will have
    // different zxids for a server depending on timing.
    final HashMap<InetSocketAddress, Vote> validVotes = new HashMap<InetSocketAddress, Vote>();
    final Map<Long, Long> maxZxids = new HashMap<Long, Long>();
    for (Map.Entry<InetSocketAddress, Vote> e : votes.entrySet()) {
        // Only include votes from machines that we heard from
        final Vote v = e.getValue();
        if (heardFrom.contains(v.getId())) {
            validVotes.put(e.getKey(), v);
            Long val = maxZxids.get(v.getId());
            if (val == null || val < v.getZxid()) {
                maxZxids.put(v.getId(), v.getZxid());
            }
        }
    }
    // that id
    for (Map.Entry<InetSocketAddress, Vote> e : validVotes.entrySet()) {
        final Vote v = e.getValue();
        Long zxid = maxZxids.get(v.getId());
        if (v.getZxid() < zxid) {
            // This is safe inside an iterator as per
            // http://download.oracle.com/javase/1.5.0/docs/api/java/util/Map.Entry.html
            e.setValue(new Vote(v.getId(), zxid, v.getElectionEpoch(), v.getPeerEpoch(), v.getState()));
        }
    }
    result.numValidVotes = validVotes.size();
    final HashMap<Vote, Integer> countTable = new HashMap<Vote, Integer>();
    // Now do the tally
    for (Vote v : validVotes.values()) {
        Integer count = countTable.get(v);
        if (count == null) {
            count = 0;
        }
        countTable.put(v, count + 1);
        if (v.getId() == result.vote.getId()) {
            result.count++;
        } else if (v.getZxid() > result.vote.getZxid() || (v.getZxid() == result.vote.getZxid() && v.getId() > result.vote.getId())) {
            result.vote = v;
            result.count = 1;
        }
    }
    result.winningCount = 0;
    LOG.info("Election tally: ");
    for (Entry<Vote, Integer> entry : countTable.entrySet()) {
        if (entry.getValue() > result.winningCount) {
            result.winningCount = entry.getValue();
            result.winner = entry.getKey();
        }
        LOG.info(entry.getKey().getId() + "\t-> " + entry.getValue());
    }
    return result;
}
Also used : Vote(org.apache.zookeeper.server.quorum.Vote) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with Vote

use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.

the class LeaderElection method lookForLeader.

/**
     * Invoked in QuorumPeer to find or elect a new leader.
     * 
     * @throws InterruptedException
     */
public Vote lookForLeader() throws InterruptedException {
    try {
        self.jmxLeaderElectionBean = new LeaderElectionBean();
        MBeanRegistry.getInstance().register(self.jmxLeaderElectionBean, self.jmxLocalPeerBean);
    } catch (Exception e) {
        LOG.warn("Failed to register with JMX", e);
        self.jmxLeaderElectionBean = null;
    }
    try {
        self.setCurrentVote(new Vote(self.getId(), self.getLastLoggedZxid()));
        // We are going to look for a leader by casting a vote for ourself
        byte[] requestBytes = new byte[4];
        ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
        byte[] responseBytes = new byte[28];
        ByteBuffer responseBuffer = ByteBuffer.wrap(responseBytes);
        /* The current vote for the leader. Initially me! */
        DatagramSocket s = null;
        try {
            s = new DatagramSocket();
            s.setSoTimeout(200);
        } catch (SocketException e1) {
            LOG.error("Socket exception when creating socket for leader election", e1);
            System.exit(4);
        }
        DatagramPacket requestPacket = new DatagramPacket(requestBytes, requestBytes.length);
        DatagramPacket responsePacket = new DatagramPacket(responseBytes, responseBytes.length);
        int xid = epochGen.nextInt();
        while (self.isRunning()) {
            HashMap<InetSocketAddress, Vote> votes = new HashMap<InetSocketAddress, Vote>(self.getVotingView().size());
            requestBuffer.clear();
            requestBuffer.putInt(xid);
            requestPacket.setLength(4);
            HashSet<Long> heardFrom = new HashSet<Long>();
            for (QuorumServer server : self.getVotingView().values()) {
                LOG.info("Server address: " + server.addr);
                try {
                    requestPacket.setSocketAddress(server.addr);
                } catch (IllegalArgumentException e) {
                    // in order to capture this critical detail.
                    throw new IllegalArgumentException("Unable to set socket address on packet, msg:" + e.getMessage() + " with addr:" + server.addr, e);
                }
                try {
                    s.send(requestPacket);
                    responsePacket.setLength(responseBytes.length);
                    s.receive(responsePacket);
                    if (responsePacket.getLength() != responseBytes.length) {
                        LOG.error("Got a short response: " + responsePacket.getLength());
                        continue;
                    }
                    responseBuffer.clear();
                    int recvedXid = responseBuffer.getInt();
                    if (recvedXid != xid) {
                        LOG.error("Got bad xid: expected " + xid + " got " + recvedXid);
                        continue;
                    }
                    long peerId = responseBuffer.getLong();
                    heardFrom.add(peerId);
                    //if(server.id != peerId){
                    Vote vote = new Vote(responseBuffer.getLong(), responseBuffer.getLong());
                    InetSocketAddress addr = (InetSocketAddress) responsePacket.getSocketAddress();
                    votes.put(addr, vote);
                //}
                } catch (IOException e) {
                    LOG.warn("Ignoring exception while looking for leader", e);
                // Errors are okay, since hosts may be
                // down
                }
            }
            ElectionResult result = countVotes(votes, heardFrom);
            // for a dead peer                 
            if (result.numValidVotes == 0) {
                self.setCurrentVote(new Vote(self.getId(), self.getLastLoggedZxid()));
            } else {
                if (result.winner.getId() >= 0) {
                    self.setCurrentVote(result.vote);
                    // To do: this doesn't use a quorum verifier
                    if (result.winningCount > (self.getVotingView().size() / 2)) {
                        self.setCurrentVote(result.winner);
                        s.close();
                        Vote current = self.getCurrentVote();
                        LOG.info("Found leader: my type is: " + self.getLearnerType());
                        /*
                             * We want to make sure we implement the state machine
                             * correctly. If we are a PARTICIPANT, once a leader
                             * is elected we can move either to LEADING or 
                             * FOLLOWING. However if we are an OBSERVER, it is an
                             * error to be elected as a Leader.
                             */
                        if (self.getLearnerType() == LearnerType.OBSERVER) {
                            if (current.getId() == self.getId()) {
                                // This should never happen!
                                LOG.error("OBSERVER elected as leader!");
                                Thread.sleep(100);
                            } else {
                                self.setPeerState(ServerState.OBSERVING);
                                Thread.sleep(100);
                                return current;
                            }
                        } else {
                            self.setPeerState((current.getId() == self.getId()) ? ServerState.LEADING : ServerState.FOLLOWING);
                            if (self.getPeerState() == ServerState.FOLLOWING) {
                                Thread.sleep(100);
                            }
                            return current;
                        }
                    }
                }
            }
            Thread.sleep(1000);
        }
        return null;
    } finally {
        try {
            if (self.jmxLeaderElectionBean != null) {
                MBeanRegistry.getInstance().unregister(self.jmxLeaderElectionBean);
            }
        } catch (Exception e) {
            LOG.warn("Failed to unregister with JMX", e);
        }
        self.jmxLeaderElectionBean = null;
    }
}
Also used : SocketException(java.net.SocketException) Vote(org.apache.zookeeper.server.quorum.Vote) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) QuorumServer(org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) IOException(java.io.IOException) SocketException(java.net.SocketException) DatagramSocket(java.net.DatagramSocket) DatagramPacket(java.net.DatagramPacket) HashSet(java.util.HashSet)

Example 3 with Vote

use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.

the class FLETest method testJoinInconsistentEnsemble.

/*
     * For ZOOKEEPER-1732 verify that it is possible to join an ensemble with
     * inconsistent election round information.
     */
@Test
public void testJoinInconsistentEnsemble() throws Exception {
    int sid;
    QuorumPeer peer;
    int waitTime = 10 * 1000;
    ArrayList<QuorumPeer> peerList = new ArrayList<QuorumPeer>();
    for (sid = 0; sid < 3; sid++) {
        peers.put(Long.valueOf(sid), new QuorumServer(sid, new InetSocketAddress("127.0.0.1", PortAssignment.unique()), new InetSocketAddress("127.0.0.1", PortAssignment.unique())));
        tmpdir[sid] = ClientBase.createTmpDir();
        port[sid] = PortAssignment.unique();
    }
    // start 2 peers and verify if they form the cluster
    for (sid = 0; sid < 2; sid++) {
        peer = new QuorumPeer(peers, tmpdir[sid], tmpdir[sid], port[sid], 3, sid, 2000, 2, 2, 2);
        LOG.info("Starting peer {}", peer.getId());
        peer.start();
        peerList.add(sid, peer);
    }
    peer = peerList.get(0);
    VerifyState v1 = new VerifyState(peerList.get(0));
    v1.start();
    v1.join(waitTime);
    assertFalse(!v1.isSuccess(), "Unable to form cluster in " + waitTime + " ms");
    // Change the election round for one of the members of the ensemble
    long leaderSid = peer.getCurrentVote().getId();
    long zxid = peer.getCurrentVote().getZxid();
    long electionEpoch = peer.getCurrentVote().getElectionEpoch();
    ServerState state = peer.getCurrentVote().getState();
    long peerEpoch = peer.getCurrentVote().getPeerEpoch();
    Vote newVote = new Vote(leaderSid, zxid + 100, electionEpoch + 100, peerEpoch, state);
    peer.setCurrentVote(newVote);
    // Start 3rd peer and check if it joins the quorum
    peer = new QuorumPeer(peers, tmpdir[2], tmpdir[2], port[2], 3, 2, 2000, 2, 2, 2);
    LOG.info("Starting peer {}", peer.getId());
    peer.start();
    peerList.add(sid, peer);
    v1 = new VerifyState(peer);
    v1.start();
    v1.join(waitTime);
    if (v1.isAlive()) {
        fail("Peer " + peer.getId() + " failed to join the cluster " + "within " + waitTime + " ms");
    }
    // cleanup
    for (int id = 0; id < 3; id++) {
        peer = peerList.get(id);
        if (peer != null) {
            peer.shutdown();
        }
    }
}
Also used : Vote(org.apache.zookeeper.server.quorum.Vote) QuorumServer(org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer) InetSocketAddress(java.net.InetSocketAddress) ServerState(org.apache.zookeeper.server.quorum.QuorumPeer.ServerState) QuorumPeer(org.apache.zookeeper.server.quorum.QuorumPeer) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 4 with Vote

use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.

the class LENonTerminateTest method mockServer.

/**
     * MockServer plays the role of peer C. Respond to two requests for votes
     * with vote for self and then Assert.fail.
     */
void mockServer() throws InterruptedException, IOException {
    byte[] b = new byte[36];
    ByteBuffer responseBuffer = ByteBuffer.wrap(b);
    DatagramPacket packet = new DatagramPacket(b, b.length);
    QuorumServer server = peers.get(Long.valueOf(2));
    DatagramSocket udpSocket = new DatagramSocket(server.addr.getPort());
    LOG.info("In MockServer");
    mockLatch.countDown();
    Vote current = new Vote(2, 1);
    for (int i = 0; i < 2; ++i) {
        udpSocket.receive(packet);
        responseBuffer.rewind();
        LOG.info("Received " + responseBuffer.getInt() + " " + responseBuffer.getLong() + " " + responseBuffer.getLong());
        LOG.info("From " + packet.getSocketAddress());
        responseBuffer.clear();
        // Skip the xid
        responseBuffer.getInt();
        responseBuffer.putLong(2);
        responseBuffer.putLong(current.getId());
        responseBuffer.putLong(current.getZxid());
        packet.setData(b);
        udpSocket.send(packet);
    }
}
Also used : Vote(org.apache.zookeeper.server.quorum.Vote) DatagramSocket(java.net.DatagramSocket) QuorumServer(org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer) DatagramPacket(java.net.DatagramPacket) ByteBuffer(java.nio.ByteBuffer)

Example 5 with Vote

use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.

the class AuthFastLeaderElection method termPredicate.

private boolean termPredicate(HashMap<InetSocketAddress, Vote> votes, long l, long zxid) {
    Collection<Vote> votesCast = votes.values();
    int count = 0;
    /*
         * First make the views consistent. Sometimes peers will have different
         * zxids for a server depending on timing.
         */
    for (Vote v : votesCast) {
        if ((v.getId() == l) && (v.getZxid() == zxid))
            count++;
    }
    if (count > (self.getVotingView().size() / 2))
        return true;
    else
        return false;
}
Also used : Vote(org.apache.zookeeper.server.quorum.Vote)

Aggregations

Vote (org.apache.zookeeper.server.quorum.Vote)7 InetSocketAddress (java.net.InetSocketAddress)4 HashMap (java.util.HashMap)3 QuorumServer (org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer)3 IOException (java.io.IOException)2 DatagramPacket (java.net.DatagramPacket)2 DatagramSocket (java.net.DatagramSocket)2 SocketException (java.net.SocketException)2 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 QuorumPeer (org.apache.zookeeper.server.quorum.QuorumPeer)1 ServerState (org.apache.zookeeper.server.quorum.QuorumPeer.ServerState)1 Test (org.junit.jupiter.api.Test)1