use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.
the class LeaderElection method countVotes.
protected ElectionResult countVotes(HashMap<InetSocketAddress, Vote> votes, HashSet<Long> heardFrom) {
final ElectionResult result = new ElectionResult();
// Initialize with null vote
result.vote = new Vote(Long.MIN_VALUE, Long.MIN_VALUE);
result.winner = new Vote(Long.MIN_VALUE, Long.MIN_VALUE);
// First, filter out votes from unheard-from machines. Then
// make the views consistent. Sometimes peers will have
// different zxids for a server depending on timing.
final HashMap<InetSocketAddress, Vote> validVotes = new HashMap<InetSocketAddress, Vote>();
final Map<Long, Long> maxZxids = new HashMap<Long, Long>();
for (Map.Entry<InetSocketAddress, Vote> e : votes.entrySet()) {
// Only include votes from machines that we heard from
final Vote v = e.getValue();
if (heardFrom.contains(v.getId())) {
validVotes.put(e.getKey(), v);
Long val = maxZxids.get(v.getId());
if (val == null || val < v.getZxid()) {
maxZxids.put(v.getId(), v.getZxid());
}
}
}
// that id
for (Map.Entry<InetSocketAddress, Vote> e : validVotes.entrySet()) {
final Vote v = e.getValue();
Long zxid = maxZxids.get(v.getId());
if (v.getZxid() < zxid) {
// This is safe inside an iterator as per
// http://download.oracle.com/javase/1.5.0/docs/api/java/util/Map.Entry.html
e.setValue(new Vote(v.getId(), zxid, v.getElectionEpoch(), v.getPeerEpoch(), v.getState()));
}
}
result.numValidVotes = validVotes.size();
final HashMap<Vote, Integer> countTable = new HashMap<Vote, Integer>();
// Now do the tally
for (Vote v : validVotes.values()) {
Integer count = countTable.get(v);
if (count == null) {
count = 0;
}
countTable.put(v, count + 1);
if (v.getId() == result.vote.getId()) {
result.count++;
} else if (v.getZxid() > result.vote.getZxid() || (v.getZxid() == result.vote.getZxid() && v.getId() > result.vote.getId())) {
result.vote = v;
result.count = 1;
}
}
result.winningCount = 0;
LOG.info("Election tally: ");
for (Entry<Vote, Integer> entry : countTable.entrySet()) {
if (entry.getValue() > result.winningCount) {
result.winningCount = entry.getValue();
result.winner = entry.getKey();
}
LOG.info(entry.getKey().getId() + "\t-> " + entry.getValue());
}
return result;
}
use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.
the class LeaderElection method lookForLeader.
/**
* Invoked in QuorumPeer to find or elect a new leader.
*
* @throws InterruptedException
*/
public Vote lookForLeader() throws InterruptedException {
try {
self.jmxLeaderElectionBean = new LeaderElectionBean();
MBeanRegistry.getInstance().register(self.jmxLeaderElectionBean, self.jmxLocalPeerBean);
} catch (Exception e) {
LOG.warn("Failed to register with JMX", e);
self.jmxLeaderElectionBean = null;
}
try {
self.setCurrentVote(new Vote(self.getId(), self.getLastLoggedZxid()));
// We are going to look for a leader by casting a vote for ourself
byte[] requestBytes = new byte[4];
ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes);
byte[] responseBytes = new byte[28];
ByteBuffer responseBuffer = ByteBuffer.wrap(responseBytes);
/* The current vote for the leader. Initially me! */
DatagramSocket s = null;
try {
s = new DatagramSocket();
s.setSoTimeout(200);
} catch (SocketException e1) {
LOG.error("Socket exception when creating socket for leader election", e1);
System.exit(4);
}
DatagramPacket requestPacket = new DatagramPacket(requestBytes, requestBytes.length);
DatagramPacket responsePacket = new DatagramPacket(responseBytes, responseBytes.length);
int xid = epochGen.nextInt();
while (self.isRunning()) {
HashMap<InetSocketAddress, Vote> votes = new HashMap<InetSocketAddress, Vote>(self.getVotingView().size());
requestBuffer.clear();
requestBuffer.putInt(xid);
requestPacket.setLength(4);
HashSet<Long> heardFrom = new HashSet<Long>();
for (QuorumServer server : self.getVotingView().values()) {
LOG.info("Server address: " + server.addr);
try {
requestPacket.setSocketAddress(server.addr);
} catch (IllegalArgumentException e) {
// in order to capture this critical detail.
throw new IllegalArgumentException("Unable to set socket address on packet, msg:" + e.getMessage() + " with addr:" + server.addr, e);
}
try {
s.send(requestPacket);
responsePacket.setLength(responseBytes.length);
s.receive(responsePacket);
if (responsePacket.getLength() != responseBytes.length) {
LOG.error("Got a short response: " + responsePacket.getLength());
continue;
}
responseBuffer.clear();
int recvedXid = responseBuffer.getInt();
if (recvedXid != xid) {
LOG.error("Got bad xid: expected " + xid + " got " + recvedXid);
continue;
}
long peerId = responseBuffer.getLong();
heardFrom.add(peerId);
//if(server.id != peerId){
Vote vote = new Vote(responseBuffer.getLong(), responseBuffer.getLong());
InetSocketAddress addr = (InetSocketAddress) responsePacket.getSocketAddress();
votes.put(addr, vote);
//}
} catch (IOException e) {
LOG.warn("Ignoring exception while looking for leader", e);
// Errors are okay, since hosts may be
// down
}
}
ElectionResult result = countVotes(votes, heardFrom);
// for a dead peer
if (result.numValidVotes == 0) {
self.setCurrentVote(new Vote(self.getId(), self.getLastLoggedZxid()));
} else {
if (result.winner.getId() >= 0) {
self.setCurrentVote(result.vote);
// To do: this doesn't use a quorum verifier
if (result.winningCount > (self.getVotingView().size() / 2)) {
self.setCurrentVote(result.winner);
s.close();
Vote current = self.getCurrentVote();
LOG.info("Found leader: my type is: " + self.getLearnerType());
/*
* We want to make sure we implement the state machine
* correctly. If we are a PARTICIPANT, once a leader
* is elected we can move either to LEADING or
* FOLLOWING. However if we are an OBSERVER, it is an
* error to be elected as a Leader.
*/
if (self.getLearnerType() == LearnerType.OBSERVER) {
if (current.getId() == self.getId()) {
// This should never happen!
LOG.error("OBSERVER elected as leader!");
Thread.sleep(100);
} else {
self.setPeerState(ServerState.OBSERVING);
Thread.sleep(100);
return current;
}
} else {
self.setPeerState((current.getId() == self.getId()) ? ServerState.LEADING : ServerState.FOLLOWING);
if (self.getPeerState() == ServerState.FOLLOWING) {
Thread.sleep(100);
}
return current;
}
}
}
}
Thread.sleep(1000);
}
return null;
} finally {
try {
if (self.jmxLeaderElectionBean != null) {
MBeanRegistry.getInstance().unregister(self.jmxLeaderElectionBean);
}
} catch (Exception e) {
LOG.warn("Failed to unregister with JMX", e);
}
self.jmxLeaderElectionBean = null;
}
}
use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.
the class FLETest method testJoinInconsistentEnsemble.
/*
* For ZOOKEEPER-1732 verify that it is possible to join an ensemble with
* inconsistent election round information.
*/
@Test
public void testJoinInconsistentEnsemble() throws Exception {
int sid;
QuorumPeer peer;
int waitTime = 10 * 1000;
ArrayList<QuorumPeer> peerList = new ArrayList<QuorumPeer>();
for (sid = 0; sid < 3; sid++) {
peers.put(Long.valueOf(sid), new QuorumServer(sid, new InetSocketAddress("127.0.0.1", PortAssignment.unique()), new InetSocketAddress("127.0.0.1", PortAssignment.unique())));
tmpdir[sid] = ClientBase.createTmpDir();
port[sid] = PortAssignment.unique();
}
// start 2 peers and verify if they form the cluster
for (sid = 0; sid < 2; sid++) {
peer = new QuorumPeer(peers, tmpdir[sid], tmpdir[sid], port[sid], 3, sid, 2000, 2, 2, 2);
LOG.info("Starting peer {}", peer.getId());
peer.start();
peerList.add(sid, peer);
}
peer = peerList.get(0);
VerifyState v1 = new VerifyState(peerList.get(0));
v1.start();
v1.join(waitTime);
assertFalse(!v1.isSuccess(), "Unable to form cluster in " + waitTime + " ms");
// Change the election round for one of the members of the ensemble
long leaderSid = peer.getCurrentVote().getId();
long zxid = peer.getCurrentVote().getZxid();
long electionEpoch = peer.getCurrentVote().getElectionEpoch();
ServerState state = peer.getCurrentVote().getState();
long peerEpoch = peer.getCurrentVote().getPeerEpoch();
Vote newVote = new Vote(leaderSid, zxid + 100, electionEpoch + 100, peerEpoch, state);
peer.setCurrentVote(newVote);
// Start 3rd peer and check if it joins the quorum
peer = new QuorumPeer(peers, tmpdir[2], tmpdir[2], port[2], 3, 2, 2000, 2, 2, 2);
LOG.info("Starting peer {}", peer.getId());
peer.start();
peerList.add(sid, peer);
v1 = new VerifyState(peer);
v1.start();
v1.join(waitTime);
if (v1.isAlive()) {
fail("Peer " + peer.getId() + " failed to join the cluster " + "within " + waitTime + " ms");
}
// cleanup
for (int id = 0; id < 3; id++) {
peer = peerList.get(id);
if (peer != null) {
peer.shutdown();
}
}
}
use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.
the class LENonTerminateTest method mockServer.
/**
* MockServer plays the role of peer C. Respond to two requests for votes
* with vote for self and then Assert.fail.
*/
void mockServer() throws InterruptedException, IOException {
byte[] b = new byte[36];
ByteBuffer responseBuffer = ByteBuffer.wrap(b);
DatagramPacket packet = new DatagramPacket(b, b.length);
QuorumServer server = peers.get(Long.valueOf(2));
DatagramSocket udpSocket = new DatagramSocket(server.addr.getPort());
LOG.info("In MockServer");
mockLatch.countDown();
Vote current = new Vote(2, 1);
for (int i = 0; i < 2; ++i) {
udpSocket.receive(packet);
responseBuffer.rewind();
LOG.info("Received " + responseBuffer.getInt() + " " + responseBuffer.getLong() + " " + responseBuffer.getLong());
LOG.info("From " + packet.getSocketAddress());
responseBuffer.clear();
// Skip the xid
responseBuffer.getInt();
responseBuffer.putLong(2);
responseBuffer.putLong(current.getId());
responseBuffer.putLong(current.getZxid());
packet.setData(b);
udpSocket.send(packet);
}
}
use of org.apache.zookeeper.server.quorum.Vote in project zookeeper by apache.
the class AuthFastLeaderElection method termPredicate.
private boolean termPredicate(HashMap<InetSocketAddress, Vote> votes, long l, long zxid) {
Collection<Vote> votesCast = votes.values();
int count = 0;
/*
* First make the views consistent. Sometimes peers will have different
* zxids for a server depending on timing.
*/
for (Vote v : votesCast) {
if ((v.getId() == l) && (v.getZxid() == zxid))
count++;
}
if (count > (self.getVotingView().size() / 2))
return true;
else
return false;
}
Aggregations