use of org.apache.zookeeper.server.TxnLogEntry in project zookeeper by apache.
the class Observer method processPacket.
/**
* Controls the response of an observer to the receipt of a quorumpacket
* @param qp
* @throws Exception
*/
protected void processPacket(QuorumPacket qp) throws Exception {
TxnLogEntry logEntry;
TxnHeader hdr;
TxnDigest digest;
Record txn;
switch(qp.getType()) {
case Leader.PING:
ping(qp);
break;
case Leader.PROPOSAL:
LOG.warn("Ignoring proposal");
break;
case Leader.COMMIT:
LOG.warn("Ignoring commit");
break;
case Leader.UPTODATE:
LOG.error("Received an UPTODATE message after Observer started");
break;
case Leader.REVALIDATE:
revalidate(qp);
break;
case Leader.SYNC:
((ObserverZooKeeperServer) zk).sync();
break;
case Leader.INFORM:
ServerMetrics.getMetrics().LEARNER_COMMIT_RECEIVED_COUNT.add(1);
logEntry = SerializeUtils.deserializeTxn(qp.getData());
hdr = logEntry.getHeader();
txn = logEntry.getTxn();
digest = logEntry.getDigest();
Request request = new Request(hdr.getClientId(), hdr.getCxid(), hdr.getType(), hdr, txn, 0);
request.logLatency(ServerMetrics.getMetrics().COMMIT_PROPAGATION_LATENCY);
request.setTxnDigest(digest);
ObserverZooKeeperServer obs = (ObserverZooKeeperServer) zk;
obs.commitRequest(request);
break;
case Leader.INFORMANDACTIVATE:
// get new designated leader from (current) leader's message
ByteBuffer buffer = ByteBuffer.wrap(qp.getData());
long suggestedLeaderId = buffer.getLong();
byte[] remainingdata = new byte[buffer.remaining()];
buffer.get(remainingdata);
logEntry = SerializeUtils.deserializeTxn(remainingdata);
hdr = logEntry.getHeader();
txn = logEntry.getTxn();
digest = logEntry.getDigest();
QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) txn).getData(), UTF_8));
request = new Request(hdr.getClientId(), hdr.getCxid(), hdr.getType(), hdr, txn, 0);
request.setTxnDigest(digest);
obs = (ObserverZooKeeperServer) zk;
boolean majorChange = self.processReconfig(qv, suggestedLeaderId, qp.getZxid(), true);
obs.commitRequest(request);
if (majorChange) {
throw new Exception("changes proposed in reconfig");
}
break;
default:
LOG.warn("Unknown packet type: {}", LearnerHandler.packetToString(qp));
break;
}
}
use of org.apache.zookeeper.server.TxnLogEntry in project zookeeper by apache.
the class SerializeUtils method deserializeTxn.
public static TxnLogEntry deserializeTxn(byte[] txnBytes) throws IOException {
TxnHeader hdr = new TxnHeader();
final ByteArrayInputStream bais = new ByteArrayInputStream(txnBytes);
InputArchive ia = BinaryInputArchive.getArchive(bais);
hdr.deserialize(ia, "hdr");
bais.mark(bais.available());
Record txn = null;
switch(hdr.getType()) {
case OpCode.createSession:
// This isn't really an error txn; it just has the same
// format. The error represents the timeout
txn = new CreateSessionTxn();
break;
case OpCode.closeSession:
txn = ZooKeeperServer.isCloseSessionTxnEnabled() ? new CloseSessionTxn() : null;
break;
case OpCode.create:
case OpCode.create2:
txn = new CreateTxn();
break;
case OpCode.createTTL:
txn = new CreateTTLTxn();
break;
case OpCode.createContainer:
txn = new CreateContainerTxn();
break;
case OpCode.delete:
case OpCode.deleteContainer:
txn = new DeleteTxn();
break;
case OpCode.reconfig:
case OpCode.setData:
txn = new SetDataTxn();
break;
case OpCode.setACL:
txn = new SetACLTxn();
break;
case OpCode.error:
txn = new ErrorTxn();
break;
case OpCode.multi:
txn = new MultiTxn();
break;
default:
throw new IOException("Unsupported Txn with type=%d" + hdr.getType());
}
if (txn != null) {
try {
txn.deserialize(ia, "txn");
} catch (EOFException e) {
// perhaps this is a V0 Create
if (hdr.getType() == OpCode.create) {
CreateTxn create = (CreateTxn) txn;
bais.reset();
CreateTxnV0 createv0 = new CreateTxnV0();
createv0.deserialize(ia, "txn");
// cool now make it V1. a -1 parentCVersion will
// trigger fixup processing in processTxn
create.setPath(createv0.getPath());
create.setData(createv0.getData());
create.setAcl(createv0.getAcl());
create.setEphemeral(createv0.getEphemeral());
create.setParentCVersion(-1);
} else if (hdr.getType() == OpCode.closeSession) {
// perhaps this is before CloseSessionTxn was added,
// ignore it and reset txn to null
txn = null;
} else {
throw e;
}
}
}
TxnDigest digest = null;
if (ZooKeeperServer.isDigestEnabled()) {
digest = new TxnDigest();
try {
digest.deserialize(ia, "digest");
} catch (EOFException exception) {
// may not have digest in the txn
digest = null;
}
}
return new TxnLogEntry(txn, hdr, digest);
}
use of org.apache.zookeeper.server.TxnLogEntry in project zookeeper by apache.
the class LogChopper method chop.
public static boolean chop(InputStream is, OutputStream os, long zxid) throws IOException {
BinaryInputArchive logStream = BinaryInputArchive.getArchive(is);
BinaryOutputArchive choppedStream = BinaryOutputArchive.getArchive(os);
FileHeader fhdr = new FileHeader();
fhdr.deserialize(logStream, "fileheader");
if (fhdr.getMagic() != FileTxnLog.TXNLOG_MAGIC) {
System.err.println("Invalid magic number in txn log file");
return false;
}
System.out.println("ZooKeeper Transactional Log File with dbid " + fhdr.getDbid() + " txnlog format version " + fhdr.getVersion());
fhdr.serialize(choppedStream, "fileheader");
int count = 0;
boolean hasZxid = false;
long previousZxid = -1;
while (true) {
long crcValue;
byte[] bytes;
try {
crcValue = logStream.readLong("crcvalue");
bytes = logStream.readBuffer("txnEntry");
} catch (EOFException e) {
System.out.println("EOF reached after " + count + " txns.");
// returning false because nothing was chopped
return false;
}
if (bytes.length == 0) {
// Since we preallocate, we define EOF to be an
// empty transaction
System.out.println("EOF reached after " + count + " txns.");
// returning false because nothing was chopped
return false;
}
Checksum crc = new Adler32();
crc.update(bytes, 0, bytes.length);
if (crcValue != crc.getValue()) {
throw new IOException("CRC doesn't match " + crcValue + " vs " + crc.getValue());
}
TxnLogEntry entry = SerializeUtils.deserializeTxn(bytes);
TxnHeader hdr = entry.getHeader();
Record txn = entry.getTxn();
if (logStream.readByte("EOR") != 'B') {
System.out.println("Last transaction was partial.");
throw new EOFException("Last transaction was partial.");
}
final long txnZxid = hdr.getZxid();
if (txnZxid == zxid) {
hasZxid = true;
}
// logging the gap to make the inconsistency investigation easier
if (previousZxid != -1 && txnZxid != previousZxid + 1) {
long txnEpoch = ZxidUtils.getEpochFromZxid(txnZxid);
long txnCounter = ZxidUtils.getCounterFromZxid(txnZxid);
long previousEpoch = ZxidUtils.getEpochFromZxid(previousZxid);
if (txnEpoch == previousEpoch) {
System.out.println(String.format("There is intra-epoch gap between %x and %x", previousZxid, txnZxid));
} else if (txnCounter != 1) {
System.out.println(String.format("There is inter-epoch gap between %x and %x", previousZxid, txnZxid));
}
}
previousZxid = txnZxid;
if (txnZxid > zxid) {
if (count == 0 || !hasZxid) {
System.out.println(String.format("This log does not contain zxid %x", zxid));
return false;
}
System.out.println(String.format("Chopping at %x new log has %d records", zxid, count));
return true;
}
choppedStream.writeLong(crcValue, "crcvalue");
choppedStream.writeBuffer(bytes, "txnEntry");
choppedStream.writeByte((byte) 'B', "EOR");
count++;
}
}
use of org.apache.zookeeper.server.TxnLogEntry in project zookeeper by apache.
the class GetProposalFromTxnTest method testGetProposalFromTxn.
/**
* Test loading proposal from txnlog
*
* @throws Exception
* an exception might be thrown here
*/
@Test
public void testGetProposalFromTxn() throws Exception {
File tmpDir = ClientBase.createTmpDir();
ClientBase.setupTestEnv();
ZooKeeperServer zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
SyncRequestProcessor.setSnapCount(100);
final int PORT = Integer.parseInt(HOSTPORT.split(":")[1]);
ServerCnxnFactory f = ServerCnxnFactory.createFactory(PORT, -1);
f.startup(zks);
assertTrue(ClientBase.waitForServerUp(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server being up ");
ZooKeeper zk = ClientBase.createZKClient(HOSTPORT);
// Generate transaction so we will have some txnlog
Long[] zxids = new Long[MSG_COUNT];
try {
String data = "data";
byte[] bytes = data.getBytes();
for (int i = 0; i < MSG_COUNT; i++) {
Stat stat = new Stat();
zk.create("/invalidsnap-" + i, bytes, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
zk.getData("/invalidsnap-" + i, null, stat);
zxids[i] = stat.getCzxid();
}
} finally {
zk.close();
}
// shutdown and start zookeeper again
f.shutdown();
zks.shutdown();
assertTrue(ClientBase.waitForServerDown(HOSTPORT, CONNECTION_TIMEOUT), "waiting for server to shutdown");
zks = new ZooKeeperServer(tmpDir, tmpDir, 3000);
zks.startdata();
ZKDatabase db = zks.getZKDatabase();
// Set sizeLimit to be very high number, so we can pull all transactions
// from txnlog
Iterator<Proposal> itr = db.getProposalsFromTxnLog(zxids[0], 10000000);
int createCount = 0;
ArrayList<Long> retrievedZxids = new ArrayList<Long>(MSG_COUNT);
// Get zxid of create requests
while (itr.hasNext()) {
Proposal proposal = itr.next();
TxnLogEntry logEntry = SerializeUtils.deserializeTxn(proposal.packet.getData());
TxnHeader hdr = logEntry.getHeader();
Record rec = logEntry.getTxn();
if (hdr.getType() == OpCode.create) {
retrievedZxids.add(hdr.getZxid());
createCount++;
}
}
// All zxid should match what we created
assertTrue(Arrays.equals(zxids, retrievedZxids.toArray(new Long[0])), "Zxids missmatches");
// There should be 2000 create requests
assertTrue((createCount == MSG_COUNT), "create proposal count == " + MSG_COUNT);
// We are requesting half the number of transaction from the snapshot
// this should exceed threshold (ZKDatabase.snapshotSizeFactor)
db.setSnapshotSizeFactor(0.33);
long sizeLimit = db.calculateTxnLogSizeLimit();
itr = db.getProposalsFromTxnLog(zxids[MSG_COUNT / 2], sizeLimit);
assertFalse((itr.hasNext()), "Expect empty proposal");
f.shutdown();
zks.shutdown();
}
use of org.apache.zookeeper.server.TxnLogEntry in project zookeeper by apache.
the class Learner method syncWithLeader.
/**
* Finally, synchronize our history with the Leader (if Follower)
* or the LearnerMaster (if Observer).
* @param newLeaderZxid
* @throws IOException
* @throws InterruptedException
*/
protected void syncWithLeader(long newLeaderZxid) throws Exception {
QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null, null);
QuorumPacket qp = new QuorumPacket();
long newEpoch = ZxidUtils.getEpochFromZxid(newLeaderZxid);
QuorumVerifier newLeaderQV = null;
// In the DIFF case we don't need to do a snapshot because the transactions will sync on top of any existing snapshot
// For SNAP and TRUNC the snapshot is needed to save that history
boolean snapshotNeeded = true;
boolean syncSnapshot = false;
readPacket(qp);
Deque<Long> packetsCommitted = new ArrayDeque<>();
Deque<PacketInFlight> packetsNotCommitted = new ArrayDeque<>();
synchronized (zk) {
if (qp.getType() == Leader.DIFF) {
LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
self.setSyncMode(QuorumPeer.SyncMode.DIFF);
if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) {
LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading.");
snapshotNeeded = true;
syncSnapshot = true;
} else {
snapshotNeeded = false;
}
} else if (qp.getType() == Leader.SNAP) {
self.setSyncMode(QuorumPeer.SyncMode.SNAP);
LOG.info("Getting a snapshot from leader 0x{}", Long.toHexString(qp.getZxid()));
// The leader is going to dump the database
// db is clear as part of deserializeSnapshot()
zk.getZKDatabase().deserializeSnapshot(leaderIs);
// inconsistency of config node content during rolling restart.
if (!self.isReconfigEnabled()) {
LOG.debug("Reset config node content from local config after deserialization of snapshot.");
zk.getZKDatabase().initConfigInZKDatabase(self.getQuorumVerifier());
}
String signature = leaderIs.readString("signature");
if (!signature.equals("BenWasHere")) {
LOG.error("Missing signature. Got {}", signature);
throw new IOException("Missing signature");
}
zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
// immediately persist the latest snapshot when there is txn log gap
syncSnapshot = true;
} else if (qp.getType() == Leader.TRUNC) {
// we need to truncate the log to the lastzxid of the leader
self.setSyncMode(QuorumPeer.SyncMode.TRUNC);
LOG.warn("Truncating log to get in sync with the leader 0x{}", Long.toHexString(qp.getZxid()));
boolean truncated = zk.getZKDatabase().truncateLog(qp.getZxid());
if (!truncated) {
// not able to truncate the log
LOG.error("Not able to truncate the log 0x{}", Long.toHexString(qp.getZxid()));
ServiceUtils.requestSystemExit(ExitCode.QUORUM_PACKET_ERROR.getValue());
}
zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
} else {
LOG.error("Got unexpected packet from leader: {}, exiting ... ", LearnerHandler.packetToString(qp));
ServiceUtils.requestSystemExit(ExitCode.QUORUM_PACKET_ERROR.getValue());
}
zk.getZKDatabase().initConfigInZKDatabase(self.getQuorumVerifier());
zk.createSessionTracker();
long lastQueued = 0;
// in Zab V1.0 (ZK 3.4+) we might take a snapshot when we get the NEWLEADER message, but in pre V1.0
// we take the snapshot on the UPDATE message, since Zab V1.0 also gets the UPDATE (after the NEWLEADER)
// we need to make sure that we don't take the snapshot twice.
boolean isPreZAB1_0 = true;
// If we are not going to take the snapshot be sure the transactions are not applied in memory
// but written out to the transaction log
boolean writeToTxnLog = !snapshotNeeded;
TxnLogEntry logEntry;
// we are now going to start getting transactions to apply followed by an UPTODATE
outerLoop: while (self.isRunning()) {
readPacket(qp);
switch(qp.getType()) {
case Leader.PROPOSAL:
PacketInFlight pif = new PacketInFlight();
logEntry = SerializeUtils.deserializeTxn(qp.getData());
pif.hdr = logEntry.getHeader();
pif.rec = logEntry.getTxn();
pif.digest = logEntry.getDigest();
if (pif.hdr.getZxid() != lastQueued + 1) {
LOG.warn("Got zxid 0x{} expected 0x{}", Long.toHexString(pif.hdr.getZxid()), Long.toHexString(lastQueued + 1));
}
lastQueued = pif.hdr.getZxid();
if (pif.hdr.getType() == OpCode.reconfig) {
SetDataTxn setDataTxn = (SetDataTxn) pif.rec;
QuorumVerifier qv = self.configFromString(new String(setDataTxn.getData(), UTF_8));
self.setLastSeenQuorumVerifier(qv, true);
}
packetsNotCommitted.add(pif);
break;
case Leader.COMMIT:
case Leader.COMMITANDACTIVATE:
pif = packetsNotCommitted.peekFirst();
if (pif.hdr.getZxid() == qp.getZxid() && qp.getType() == Leader.COMMITANDACTIVATE) {
QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) pif.rec).getData(), UTF_8));
boolean majorChange = self.processReconfig(qv, ByteBuffer.wrap(qp.getData()).getLong(), qp.getZxid(), true);
if (majorChange) {
throw new Exception("changes proposed in reconfig");
}
}
if (!writeToTxnLog) {
if (pif.hdr.getZxid() != qp.getZxid()) {
LOG.warn("Committing 0x{}, but next proposal is 0x{}", Long.toHexString(qp.getZxid()), Long.toHexString(pif.hdr.getZxid()));
} else {
zk.processTxn(pif.hdr, pif.rec);
packetsNotCommitted.remove();
}
} else {
packetsCommitted.add(qp.getZxid());
}
break;
case Leader.INFORM:
case Leader.INFORMANDACTIVATE:
PacketInFlight packet = new PacketInFlight();
if (qp.getType() == Leader.INFORMANDACTIVATE) {
ByteBuffer buffer = ByteBuffer.wrap(qp.getData());
long suggestedLeaderId = buffer.getLong();
byte[] remainingdata = new byte[buffer.remaining()];
buffer.get(remainingdata);
logEntry = SerializeUtils.deserializeTxn(remainingdata);
packet.hdr = logEntry.getHeader();
packet.rec = logEntry.getTxn();
packet.digest = logEntry.getDigest();
QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) packet.rec).getData(), UTF_8));
boolean majorChange = self.processReconfig(qv, suggestedLeaderId, qp.getZxid(), true);
if (majorChange) {
throw new Exception("changes proposed in reconfig");
}
} else {
logEntry = SerializeUtils.deserializeTxn(qp.getData());
packet.rec = logEntry.getTxn();
packet.hdr = logEntry.getHeader();
packet.digest = logEntry.getDigest();
// Log warning message if txn comes out-of-order
if (packet.hdr.getZxid() != lastQueued + 1) {
LOG.warn("Got zxid 0x{} expected 0x{}", Long.toHexString(packet.hdr.getZxid()), Long.toHexString(lastQueued + 1));
}
lastQueued = packet.hdr.getZxid();
}
if (!writeToTxnLog) {
// Apply to db directly if we haven't taken the snapshot
zk.processTxn(packet.hdr, packet.rec);
} else {
packetsNotCommitted.add(packet);
packetsCommitted.add(qp.getZxid());
}
break;
case Leader.UPTODATE:
LOG.info("Learner received UPTODATE message");
if (newLeaderQV != null) {
boolean majorChange = self.processReconfig(newLeaderQV, null, null, true);
if (majorChange) {
throw new Exception("changes proposed in reconfig");
}
}
if (isPreZAB1_0) {
zk.takeSnapshot(syncSnapshot);
self.setCurrentEpoch(newEpoch);
}
self.setZooKeeperServer(zk);
self.adminServer.setZooKeeperServer(zk);
break outerLoop;
case // Getting NEWLEADER here instead of in discovery
Leader.NEWLEADER:
// means this is Zab 1.0
LOG.info("Learner received NEWLEADER message");
if (qp.getData() != null && qp.getData().length > 1) {
try {
QuorumVerifier qv = self.configFromString(new String(qp.getData(), UTF_8));
self.setLastSeenQuorumVerifier(qv, true);
newLeaderQV = qv;
} catch (Exception e) {
e.printStackTrace();
}
}
if (snapshotNeeded) {
zk.takeSnapshot(syncSnapshot);
}
self.setCurrentEpoch(newEpoch);
writeToTxnLog = true;
// Anything after this needs to go to the transaction log, not applied directly in memory
isPreZAB1_0 = false;
// ZOOKEEPER-3911: make sure sync the uncommitted logs before commit them (ACK NEWLEADER).
sock.setSoTimeout(self.tickTime * self.syncLimit);
self.setSyncMode(QuorumPeer.SyncMode.NONE);
zk.startupWithoutServing();
if (zk instanceof FollowerZooKeeperServer) {
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
fzk.logRequest(p.hdr, p.rec, p.digest);
}
packetsNotCommitted.clear();
}
writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null, null), true);
break;
}
}
}
ack.setZxid(ZxidUtils.makeZxid(newEpoch, 0));
writePacket(ack, true);
zk.startServing();
/*
* Update the election vote here to ensure that all members of the
* ensemble report the same vote to new servers that start up and
* send leader election notifications to the ensemble.
*
* @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
*/
self.updateElectionVote(newEpoch);
// We need to log the stuff that came in between the snapshot and the uptodate
if (zk instanceof FollowerZooKeeperServer) {
FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
fzk.logRequest(p.hdr, p.rec, p.digest);
}
for (Long zxid : packetsCommitted) {
fzk.commit(zxid);
}
} else if (zk instanceof ObserverZooKeeperServer) {
// Similar to follower, we need to log requests between the snapshot
// and UPTODATE
ObserverZooKeeperServer ozk = (ObserverZooKeeperServer) zk;
for (PacketInFlight p : packetsNotCommitted) {
Long zxid = packetsCommitted.peekFirst();
if (p.hdr.getZxid() != zxid) {
// log warning message if there is no matching commit
// old leader send outstanding proposal to observer
LOG.warn("Committing 0x{}, but next proposal is 0x{}", Long.toHexString(zxid), Long.toHexString(p.hdr.getZxid()));
continue;
}
packetsCommitted.remove();
Request request = new Request(null, p.hdr.getClientId(), p.hdr.getCxid(), p.hdr.getType(), null, null);
request.setTxn(p.rec);
request.setHdr(p.hdr);
request.setTxnDigest(p.digest);
ozk.commitRequest(request);
}
} else {
// New server type need to handle in-flight packets
throw new UnsupportedOperationException("Unknown server type");
}
}
Aggregations