use of org.apache.zookeeper.server.quorum.flexible.QuorumVerifier in project zookeeper by apache.
the class PrepRequestProcessor method pRequest2Txn.
/**
* This method will be called inside the ProcessRequestThread, which is a
* singleton, so there will be a single thread calling this code.
*
* @param type
* @param zxid
* @param request
* @param record
*/
protected void pRequest2Txn(int type, long zxid, Request request, Record record, boolean deserialize) throws KeeperException, IOException, RequestProcessorException {
request.setHdr(new TxnHeader(request.sessionId, request.cxid, zxid, Time.currentWallTime(), type));
switch(type) {
case OpCode.create:
case OpCode.create2:
case OpCode.createTTL:
case OpCode.createContainer:
{
pRequest2TxnCreate(type, request, record, deserialize);
break;
}
case OpCode.deleteContainer:
{
String path = new String(request.request.array());
String parentPath = getParentPathAndValidate(path);
ChangeRecord parentRecord = getRecordForPath(parentPath);
ChangeRecord nodeRecord = getRecordForPath(path);
if (nodeRecord.childCount > 0) {
throw new KeeperException.NotEmptyException(path);
}
if (EphemeralType.get(nodeRecord.stat.getEphemeralOwner()) == EphemeralType.NORMAL) {
throw new KeeperException.BadVersionException(path);
}
request.setTxn(new DeleteTxn(path));
parentRecord = parentRecord.duplicate(request.getHdr().getZxid());
parentRecord.childCount--;
addChangeRecord(parentRecord);
addChangeRecord(new ChangeRecord(request.getHdr().getZxid(), path, null, -1, null));
break;
}
case OpCode.delete:
zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
DeleteRequest deleteRequest = (DeleteRequest) record;
if (deserialize)
ByteBufferInputStream.byteBuffer2Record(request.request, deleteRequest);
String path = deleteRequest.getPath();
String parentPath = getParentPathAndValidate(path);
ChangeRecord parentRecord = getRecordForPath(parentPath);
ChangeRecord nodeRecord = getRecordForPath(path);
checkACL(zks, request.cnxn, parentRecord.acl, ZooDefs.Perms.DELETE, request.authInfo, path, null);
checkAndIncVersion(nodeRecord.stat.getVersion(), deleteRequest.getVersion(), path);
if (nodeRecord.childCount > 0) {
throw new KeeperException.NotEmptyException(path);
}
request.setTxn(new DeleteTxn(path));
parentRecord = parentRecord.duplicate(request.getHdr().getZxid());
parentRecord.childCount--;
addChangeRecord(parentRecord);
addChangeRecord(new ChangeRecord(request.getHdr().getZxid(), path, null, -1, null));
break;
case OpCode.setData:
zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
SetDataRequest setDataRequest = (SetDataRequest) record;
if (deserialize)
ByteBufferInputStream.byteBuffer2Record(request.request, setDataRequest);
path = setDataRequest.getPath();
validatePath(path, request.sessionId);
nodeRecord = getRecordForPath(path);
checkACL(zks, request.cnxn, nodeRecord.acl, ZooDefs.Perms.WRITE, request.authInfo, path, null);
int newVersion = checkAndIncVersion(nodeRecord.stat.getVersion(), setDataRequest.getVersion(), path);
request.setTxn(new SetDataTxn(path, setDataRequest.getData(), newVersion));
nodeRecord = nodeRecord.duplicate(request.getHdr().getZxid());
nodeRecord.stat.setVersion(newVersion);
addChangeRecord(nodeRecord);
break;
case OpCode.reconfig:
if (!QuorumPeerConfig.isReconfigEnabled()) {
LOG.error("Reconfig operation requested but reconfig feature is disabled.");
throw new KeeperException.ReconfigDisabledException();
}
if (skipACL) {
LOG.warn("skipACL is set, reconfig operation will skip ACL checks!");
}
zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
ReconfigRequest reconfigRequest = (ReconfigRequest) record;
LeaderZooKeeperServer lzks;
try {
lzks = (LeaderZooKeeperServer) zks;
} catch (ClassCastException e) {
// standalone mode - reconfiguration currently not supported
throw new KeeperException.UnimplementedException();
}
QuorumVerifier lastSeenQV = lzks.self.getLastSeenQuorumVerifier();
// check that there's no reconfig in progress
if (lastSeenQV.getVersion() != lzks.self.getQuorumVerifier().getVersion()) {
throw new KeeperException.ReconfigInProgress();
}
long configId = reconfigRequest.getCurConfigId();
if (configId != -1 && configId != lzks.self.getLastSeenQuorumVerifier().getVersion()) {
String msg = "Reconfiguration from version " + configId + " failed -- last seen version is " + lzks.self.getLastSeenQuorumVerifier().getVersion();
throw new KeeperException.BadVersionException(msg);
}
String newMembers = reconfigRequest.getNewMembers();
if (newMembers != null) {
//non-incremental membership change
LOG.info("Non-incremental reconfig");
// Input may be delimited by either commas or newlines so convert to common newline separated format
newMembers = newMembers.replaceAll(",", "\n");
try {
Properties props = new Properties();
props.load(new StringReader(newMembers));
request.qv = QuorumPeerConfig.parseDynamicConfig(props, lzks.self.getElectionType(), true, false);
request.qv.setVersion(request.getHdr().getZxid());
} catch (IOException e) {
throw new KeeperException.BadArgumentsException(e.getMessage());
} catch (ConfigException e) {
throw new KeeperException.BadArgumentsException(e.getMessage());
}
} else {
//incremental change - must be a majority quorum system
LOG.info("Incremental reconfig");
List<String> joiningServers = null;
String joiningServersString = reconfigRequest.getJoiningServers();
if (joiningServersString != null) {
joiningServers = StringUtils.split(joiningServersString, ",");
}
List<String> leavingServers = null;
String leavingServersString = reconfigRequest.getLeavingServers();
if (leavingServersString != null) {
leavingServers = StringUtils.split(leavingServersString, ",");
}
if (!(lastSeenQV instanceof QuorumMaj)) {
String msg = "Incremental reconfiguration requested but last configuration seen has a non-majority quorum system";
LOG.warn(msg);
throw new KeeperException.BadArgumentsException(msg);
}
Map<Long, QuorumServer> nextServers = new HashMap<Long, QuorumServer>(lastSeenQV.getAllMembers());
try {
if (leavingServers != null) {
for (String leaving : leavingServers) {
long sid = Long.parseLong(leaving);
nextServers.remove(sid);
}
}
if (joiningServers != null) {
for (String joiner : joiningServers) {
// joiner should have the following format: server.x = server_spec;client_spec
String[] parts = StringUtils.split(joiner, "=").toArray(new String[0]);
if (parts.length != 2) {
throw new KeeperException.BadArgumentsException("Wrong format of server string");
}
// extract server id x from first part of joiner: server.x
Long sid = Long.parseLong(parts[0].substring(parts[0].lastIndexOf('.') + 1));
QuorumServer qs = new QuorumServer(sid, parts[1]);
if (qs.clientAddr == null || qs.electionAddr == null || qs.addr == null) {
throw new KeeperException.BadArgumentsException("Wrong format of server string - each server should have 3 ports specified");
}
// check duplication of addresses and ports
for (QuorumServer nqs : nextServers.values()) {
if (qs.id == nqs.id) {
continue;
}
qs.checkAddressDuplicate(nqs);
}
nextServers.remove(qs.id);
nextServers.put(Long.valueOf(qs.id), qs);
}
}
} catch (ConfigException e) {
throw new KeeperException.BadArgumentsException("Reconfiguration failed");
}
request.qv = new QuorumMaj(nextServers);
request.qv.setVersion(request.getHdr().getZxid());
}
if (QuorumPeerConfig.isStandaloneEnabled() && request.qv.getVotingMembers().size() < 2) {
String msg = "Reconfig failed - new configuration must include at least 2 followers";
LOG.warn(msg);
throw new KeeperException.BadArgumentsException(msg);
} else if (request.qv.getVotingMembers().size() < 1) {
String msg = "Reconfig failed - new configuration must include at least 1 follower";
LOG.warn(msg);
throw new KeeperException.BadArgumentsException(msg);
}
if (!lzks.getLeader().isQuorumSynced(request.qv)) {
String msg2 = "Reconfig failed - there must be a connected and synced quorum in new configuration";
LOG.warn(msg2);
throw new KeeperException.NewConfigNoQuorum();
}
nodeRecord = getRecordForPath(ZooDefs.CONFIG_NODE);
checkACL(zks, request.cnxn, nodeRecord.acl, ZooDefs.Perms.WRITE, request.authInfo, null, null);
request.setTxn(new SetDataTxn(ZooDefs.CONFIG_NODE, request.qv.toString().getBytes(), -1));
nodeRecord = nodeRecord.duplicate(request.getHdr().getZxid());
nodeRecord.stat.setVersion(-1);
addChangeRecord(nodeRecord);
break;
case OpCode.setACL:
zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
SetACLRequest setAclRequest = (SetACLRequest) record;
if (deserialize)
ByteBufferInputStream.byteBuffer2Record(request.request, setAclRequest);
path = setAclRequest.getPath();
validatePath(path, request.sessionId);
List<ACL> listACL = fixupACL(path, request.authInfo, setAclRequest.getAcl());
nodeRecord = getRecordForPath(path);
checkACL(zks, request.cnxn, nodeRecord.acl, ZooDefs.Perms.ADMIN, request.authInfo, path, listACL);
newVersion = checkAndIncVersion(nodeRecord.stat.getAversion(), setAclRequest.getVersion(), path);
request.setTxn(new SetACLTxn(path, listACL, newVersion));
nodeRecord = nodeRecord.duplicate(request.getHdr().getZxid());
nodeRecord.stat.setAversion(newVersion);
addChangeRecord(nodeRecord);
break;
case OpCode.createSession:
request.request.rewind();
int to = request.request.getInt();
request.setTxn(new CreateSessionTxn(to));
request.request.rewind();
if (request.isLocalSession()) {
// This will add to local session tracker if it is enabled
zks.sessionTracker.addSession(request.sessionId, to);
} else {
// Explicitly add to global session if the flag is not set
zks.sessionTracker.addGlobalSession(request.sessionId, to);
}
zks.setOwner(request.sessionId, request.getOwner());
break;
case OpCode.closeSession:
// We don't want to do this check since the session expiration thread
// queues up this operation without being the session owner.
// this request is the last of the session so it should be ok
//zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
Set<String> es = zks.getZKDatabase().getEphemerals(request.sessionId);
synchronized (zks.outstandingChanges) {
for (ChangeRecord c : zks.outstandingChanges) {
if (c.stat == null) {
// Doing a delete
es.remove(c.path);
} else if (c.stat.getEphemeralOwner() == request.sessionId) {
es.add(c.path);
}
}
for (String path2Delete : es) {
addChangeRecord(new ChangeRecord(request.getHdr().getZxid(), path2Delete, null, 0, null));
}
zks.sessionTracker.setSessionClosing(request.sessionId);
}
LOG.info("Processed session termination for sessionid: 0x" + Long.toHexString(request.sessionId));
break;
case OpCode.check:
zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
CheckVersionRequest checkVersionRequest = (CheckVersionRequest) record;
if (deserialize)
ByteBufferInputStream.byteBuffer2Record(request.request, checkVersionRequest);
path = checkVersionRequest.getPath();
validatePath(path, request.sessionId);
nodeRecord = getRecordForPath(path);
checkACL(zks, request.cnxn, nodeRecord.acl, ZooDefs.Perms.READ, request.authInfo, path, null);
request.setTxn(new CheckVersionTxn(path, checkAndIncVersion(nodeRecord.stat.getVersion(), checkVersionRequest.getVersion(), path)));
break;
default:
LOG.warn("unknown type " + type);
break;
}
}
use of org.apache.zookeeper.server.quorum.flexible.QuorumVerifier in project zookeeper by apache.
the class Leader method lead.
/**
* This method is main function that is called to lead
*
* @throws IOException
* @throws InterruptedException
*/
void lead() throws IOException, InterruptedException {
self.end_fle = Time.currentElapsedTime();
long electionTimeTaken = self.end_fle - self.start_fle;
self.setElectionTimeTaken(electionTimeTaken);
LOG.info("LEADING - LEADER ELECTION TOOK - {} {}", electionTimeTaken, QuorumPeer.FLE_TIME_UNIT);
self.start_fle = 0;
self.end_fle = 0;
zk.registerJMX(new LeaderBean(this, zk), self.jmxLocalPeerBean);
try {
self.tick.set(0);
zk.loadData();
leaderStateSummary = new StateSummary(self.getCurrentEpoch(), zk.getLastProcessedZxid());
// Start thread that waits for connection requests from
// new followers.
cnxAcceptor = new LearnerCnxAcceptor();
cnxAcceptor.start();
long epoch = getEpochToPropose(self.getId(), self.getAcceptedEpoch());
zk.setZxid(ZxidUtils.makeZxid(epoch, 0));
synchronized (this) {
lastProposed = zk.getZxid();
}
newLeaderProposal.packet = new QuorumPacket(NEWLEADER, zk.getZxid(), null, null);
if ((newLeaderProposal.packet.getZxid() & 0xffffffffL) != 0) {
LOG.info("NEWLEADER proposal has Zxid of " + Long.toHexString(newLeaderProposal.packet.getZxid()));
}
QuorumVerifier lastSeenQV = self.getLastSeenQuorumVerifier();
QuorumVerifier curQV = self.getQuorumVerifier();
if (curQV.getVersion() == 0 && curQV.getVersion() == lastSeenQV.getVersion()) {
// the last-seen-quorumverifier of the leader, which we change below
try {
QuorumVerifier newQV = self.configFromString(curQV.toString());
newQV.setVersion(zk.getZxid());
self.setLastSeenQuorumVerifier(newQV, true);
} catch (Exception e) {
throw new IOException(e);
}
}
newLeaderProposal.addQuorumVerifier(self.getQuorumVerifier());
if (self.getLastSeenQuorumVerifier().getVersion() > self.getQuorumVerifier().getVersion()) {
newLeaderProposal.addQuorumVerifier(self.getLastSeenQuorumVerifier());
}
// We have to get at least a majority of servers in sync with
// us. We do this by waiting for the NEWLEADER packet to get
// acknowledged
waitForEpochAck(self.getId(), leaderStateSummary);
self.setCurrentEpoch(epoch);
try {
waitForNewLeaderAck(self.getId(), zk.getZxid(), LearnerType.PARTICIPANT);
} catch (InterruptedException e) {
shutdown("Waiting for a quorum of followers, only synced with sids: [ " + newLeaderProposal.ackSetsToString() + " ]");
HashSet<Long> followerSet = new HashSet<Long>();
for (LearnerHandler f : getLearners()) {
if (self.getQuorumVerifier().getVotingMembers().containsKey(f.getSid())) {
followerSet.add(f.getSid());
}
}
boolean initTicksShouldBeIncreased = true;
for (Proposal.QuorumVerifierAcksetPair qvAckset : newLeaderProposal.qvAcksetPairs) {
if (!qvAckset.getQuorumVerifier().containsQuorum(followerSet)) {
initTicksShouldBeIncreased = false;
break;
}
}
if (initTicksShouldBeIncreased) {
LOG.warn("Enough followers present. " + "Perhaps the initTicks need to be increased.");
}
return;
}
startZkServer();
/**
* WARNING: do not use this for anything other than QA testing
* on a real cluster. Specifically to enable verification that quorum
* can handle the lower 32bit roll-over issue identified in
* ZOOKEEPER-1277. Without this option it would take a very long
* time (on order of a month say) to see the 4 billion writes
* necessary to cause the roll-over to occur.
*
* This field allows you to override the zxid of the server. Typically
* you'll want to set it to something like 0xfffffff0 and then
* start the quorum, run some operations and see the re-election.
*/
String initialZxid = System.getProperty("zookeeper.testingonly.initialZxid");
if (initialZxid != null) {
long zxid = Long.parseLong(initialZxid);
zk.setZxid((zk.getZxid() & 0xffffffff00000000L) | zxid);
}
if (!System.getProperty("zookeeper.leaderServes", "yes").equals("no")) {
self.setZooKeeperServer(zk);
}
self.adminServer.setZooKeeperServer(zk);
// Everything is a go, simply start counting the ticks
// WARNING: I couldn't find any wait statement on a synchronized
// block that would be notified by this notifyAll() call, so
// I commented it out
//synchronized (this) {
// notifyAll();
//}
// We ping twice a tick, so we only update the tick every other
// iteration
boolean tickSkip = true;
// If not null then shutdown this leader
String shutdownMessage = null;
while (true) {
synchronized (this) {
long start = Time.currentElapsedTime();
long cur = start;
long end = start + self.tickTime / 2;
while (cur < end) {
wait(end - cur);
cur = Time.currentElapsedTime();
}
if (!tickSkip) {
self.tick.incrementAndGet();
}
// We use an instance of SyncedLearnerTracker to
// track synced learners to make sure we still have a
// quorum of current (and potentially next pending) view.
SyncedLearnerTracker syncedAckSet = new SyncedLearnerTracker();
syncedAckSet.addQuorumVerifier(self.getQuorumVerifier());
if (self.getLastSeenQuorumVerifier() != null && self.getLastSeenQuorumVerifier().getVersion() > self.getQuorumVerifier().getVersion()) {
syncedAckSet.addQuorumVerifier(self.getLastSeenQuorumVerifier());
}
syncedAckSet.addAck(self.getId());
for (LearnerHandler f : getLearners()) {
if (f.synced()) {
syncedAckSet.addAck(f.getSid());
}
}
// check leader running status
if (!this.isRunning()) {
// set shutdown flag
shutdownMessage = "Unexpected internal error";
break;
}
if (!tickSkip && !syncedAckSet.hasAllQuorums()) {
// Lost quorum of last committed and/or last proposed
// config, set shutdown flag
shutdownMessage = "Not sufficient followers synced, only synced with sids: [ " + syncedAckSet.ackSetsToString() + " ]";
break;
}
tickSkip = !tickSkip;
}
for (LearnerHandler f : getLearners()) {
f.ping();
}
}
if (shutdownMessage != null) {
shutdown(shutdownMessage);
// leader goes in looking state
}
} finally {
zk.unregisterJMX(this);
}
}
use of org.apache.zookeeper.server.quorum.flexible.QuorumVerifier in project zookeeper by apache.
the class Leader method waitForEpochAck.
public void waitForEpochAck(long id, StateSummary ss) throws IOException, InterruptedException {
synchronized (electingFollowers) {
if (electionFinished) {
return;
}
if (ss.getCurrentEpoch() != -1) {
if (ss.isMoreRecentThan(leaderStateSummary)) {
throw new IOException("Follower is ahead of the leader, leader summary: " + leaderStateSummary.getCurrentEpoch() + " (current epoch), " + leaderStateSummary.getLastZxid() + " (last zxid)");
}
if (ss.getLastZxid() != -1) {
electingFollowers.add(id);
}
}
QuorumVerifier verifier = self.getQuorumVerifier();
if (electingFollowers.contains(self.getId()) && verifier.containsQuorum(electingFollowers)) {
electionFinished = true;
electingFollowers.notifyAll();
} else {
long start = Time.currentElapsedTime();
long cur = start;
long end = start + self.getInitLimit() * self.getTickTime();
while (!electionFinished && cur < end) {
electingFollowers.wait(end - cur);
cur = Time.currentElapsedTime();
}
if (!electionFinished) {
throw new InterruptedException("Timeout while waiting for epoch to be acked by quorum");
}
}
}
}
use of org.apache.zookeeper.server.quorum.flexible.QuorumVerifier in project zookeeper by apache.
the class Leader method startZkServer.
/**
* Start up Leader ZooKeeper server and initialize zxid to the new epoch
*/
private synchronized void startZkServer() {
// Update lastCommitted and Db's zxid to a value representing the new epoch
lastCommitted = zk.getZxid();
LOG.info("Have quorum of supporters, sids: [ " + newLeaderProposal.ackSetsToString() + " ]; starting up and setting last processed zxid: 0x{}", Long.toHexString(zk.getZxid()));
/*
* ZOOKEEPER-1324. the leader sends the new config it must complete
* to others inside a NEWLEADER message (see LearnerHandler where
* the NEWLEADER message is constructed), and once it has enough
* acks we must execute the following code so that it applies the
* config to itself.
*/
QuorumVerifier newQV = self.getLastSeenQuorumVerifier();
Long designatedLeader = getDesignatedLeader(newLeaderProposal, zk.getZxid());
self.processReconfig(newQV, designatedLeader, zk.getZxid(), true);
if (designatedLeader != self.getId()) {
allowedToCommit = false;
}
zk.startup();
/*
* Update the election vote here to ensure that all members of the
* ensemble report the same vote to new servers that start up and
* send leader election notifications to the ensemble.
*
* @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
*/
self.updateElectionVote(getEpoch());
zk.getZKDatabase().setlastProcessedZxid(zk.getZxid());
}
use of org.apache.zookeeper.server.quorum.flexible.QuorumVerifier in project zookeeper by apache.
the class Observer method processPacket.
/**
* Controls the response of an observer to the receipt of a quorumpacket
* @param qp
* @throws Exception
*/
protected void processPacket(QuorumPacket qp) throws Exception {
switch(qp.getType()) {
case Leader.PING:
ping(qp);
break;
case Leader.PROPOSAL:
LOG.warn("Ignoring proposal");
break;
case Leader.COMMIT:
LOG.warn("Ignoring commit");
break;
case Leader.UPTODATE:
LOG.error("Received an UPTODATE message after Observer started");
break;
case Leader.REVALIDATE:
revalidate(qp);
break;
case Leader.SYNC:
((ObserverZooKeeperServer) zk).sync();
break;
case Leader.INFORM:
TxnHeader hdr = new TxnHeader();
Record txn = SerializeUtils.deserializeTxn(qp.getData(), hdr);
Request request = new Request(hdr.getClientId(), hdr.getCxid(), hdr.getType(), hdr, txn, 0);
ObserverZooKeeperServer obs = (ObserverZooKeeperServer) zk;
obs.commitRequest(request);
break;
case Leader.INFORMANDACTIVATE:
hdr = new TxnHeader();
// get new designated leader from (current) leader's message
ByteBuffer buffer = ByteBuffer.wrap(qp.getData());
long suggestedLeaderId = buffer.getLong();
byte[] remainingdata = new byte[buffer.remaining()];
buffer.get(remainingdata);
txn = SerializeUtils.deserializeTxn(remainingdata, hdr);
QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) txn).getData()));
request = new Request(hdr.getClientId(), hdr.getCxid(), hdr.getType(), hdr, txn, 0);
obs = (ObserverZooKeeperServer) zk;
boolean majorChange = self.processReconfig(qv, suggestedLeaderId, qp.getZxid(), true);
obs.commitRequest(request);
if (majorChange) {
throw new Exception("changes proposed in reconfig");
}
break;
default:
LOG.warn("Unknown packet type: {}", LearnerHandler.packetToString(qp));
break;
}
}
Aggregations