Search in sources :

Example 16 with TxnHeader

use of org.apache.zookeeper.txn.TxnHeader in project zookeeper by apache.

the class Zab1_0Test method testPopulatedLeaderConversation.

public void testPopulatedLeaderConversation(PopulatedLeaderConversation conversation, int ops) throws Exception {
    Socket[] pair = getSocketPair();
    Socket leaderSocket = pair[0];
    Socket followerSocket = pair[1];
    File tmpDir = File.createTempFile("test", "dir", testData);
    tmpDir.delete();
    tmpDir.mkdir();
    LeadThread leadThread = null;
    Leader leader = null;
    try {
        // Setup a database with two znodes
        FileTxnSnapLog snapLog = new FileTxnSnapLog(tmpDir, tmpDir);
        ZKDatabase zkDb = new ZKDatabase(snapLog);
        Assert.assertTrue(ops >= 1);
        long zxid = ZxidUtils.makeZxid(1, 0);
        for (int i = 1; i <= ops; i++) {
            zxid = ZxidUtils.makeZxid(1, i);
            String path = "/foo-" + i;
            zkDb.processTxn(new TxnHeader(13, 1000 + i, zxid, 30 + i, ZooDefs.OpCode.create), new CreateTxn(path, "fpjwasalsohere".getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, false, 1));
            Stat stat = new Stat();
            Assert.assertEquals("fpjwasalsohere", new String(zkDb.getData(path, stat, null)));
        }
        Assert.assertTrue(zxid > ZxidUtils.makeZxid(1, 0));
        // Generate snapshot and close files.
        snapLog.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts());
        snapLog.close();
        QuorumPeer peer = createQuorumPeer(tmpDir);
        leader = createLeader(tmpDir, peer);
        peer.leader = leader;
        // Set the last accepted epoch and current epochs to be 1
        peer.setAcceptedEpoch(1);
        peer.setCurrentEpoch(1);
        leadThread = new LeadThread(leader);
        leadThread.start();
        while (leader.cnxAcceptor == null || !leader.cnxAcceptor.isAlive()) {
            Thread.sleep(20);
        }
        LearnerHandler lh = new LearnerHandler(leaderSocket, leader);
        lh.start();
        leaderSocket.setSoTimeout(4000);
        InputArchive ia = BinaryInputArchive.getArchive(followerSocket.getInputStream());
        OutputArchive oa = BinaryOutputArchive.getArchive(followerSocket.getOutputStream());
        conversation.converseWithLeader(ia, oa, leader, zxid);
    } finally {
        if (leader != null) {
            leader.shutdown("end of test");
        }
        if (leadThread != null) {
            leadThread.interrupt();
            leadThread.join();
        }
        TestUtils.deleteFileRecursively(tmpDir);
    }
}
Also used : InputArchive(org.apache.jute.InputArchive) BinaryInputArchive(org.apache.jute.BinaryInputArchive) ZKDatabase(org.apache.zookeeper.server.ZKDatabase) FileTxnSnapLog(org.apache.zookeeper.server.persistence.FileTxnSnapLog) CreateTxn(org.apache.zookeeper.txn.CreateTxn) Stat(org.apache.zookeeper.data.Stat) BinaryOutputArchive(org.apache.jute.BinaryOutputArchive) OutputArchive(org.apache.jute.OutputArchive) File(java.io.File) ServerSocket(java.net.ServerSocket) Socket(java.net.Socket) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Example 17 with TxnHeader

use of org.apache.zookeeper.txn.TxnHeader in project zookeeper by apache.

the class TruncateTest method testTruncationStreamReset.

@Test
public void testTruncationStreamReset() throws Exception {
    File tmpdir = ClientBase.createTmpDir();
    FileTxnSnapLog snaplog = new FileTxnSnapLog(tmpdir, tmpdir);
    ZKDatabase zkdb = new ZKDatabase(snaplog);
    // make sure to snapshot, so that we have something there when
    // truncateLog reloads the db
    snaplog.save(zkdb.getDataTree(), zkdb.getSessionWithTimeOuts());
    for (int i = 1; i <= 100; i++) {
        append(zkdb, i);
    }
    zkdb.truncateLog(1);
    append(zkdb, 200);
    zkdb.close();
    // verify that the truncation and subsequent append were processed
    // correctly
    FileTxnLog txnlog = new FileTxnLog(new File(tmpdir, "version-2"));
    TxnIterator iter = txnlog.read(1);
    TxnHeader hdr = iter.getHeader();
    Record txn = iter.getTxn();
    Assert.assertEquals(1, hdr.getZxid());
    Assert.assertTrue(txn instanceof SetDataTxn);
    iter.next();
    hdr = iter.getHeader();
    txn = iter.getTxn();
    Assert.assertEquals(200, hdr.getZxid());
    Assert.assertTrue(txn instanceof SetDataTxn);
    iter.close();
    ClientBase.recursiveDelete(tmpdir);
}
Also used : FileTxnLog(org.apache.zookeeper.server.persistence.FileTxnLog) Record(org.apache.jute.Record) SetDataTxn(org.apache.zookeeper.txn.SetDataTxn) File(java.io.File) ZKDatabase(org.apache.zookeeper.server.ZKDatabase) TxnIterator(org.apache.zookeeper.server.persistence.TxnLog.TxnIterator) FileTxnSnapLog(org.apache.zookeeper.server.persistence.FileTxnSnapLog) TxnHeader(org.apache.zookeeper.txn.TxnHeader) Test(org.junit.Test)

Example 18 with TxnHeader

use of org.apache.zookeeper.txn.TxnHeader in project zookeeper by apache.

the class FileTxnLog method getLastLoggedZxid.

/**
     * get the last zxid that was logged in the transaction logs
     * @return the last zxid logged in the transaction logs
     */
public long getLastLoggedZxid() {
    File[] files = getLogFiles(logDir.listFiles(), 0);
    long maxLog = files.length > 0 ? Util.getZxidFromName(files[files.length - 1].getName(), "log") : -1;
    // if a log file is more recent we must scan it to find
    // the highest zxid
    long zxid = maxLog;
    TxnIterator itr = null;
    try {
        FileTxnLog txn = new FileTxnLog(logDir);
        itr = txn.read(maxLog);
        while (true) {
            if (!itr.next())
                break;
            TxnHeader hdr = itr.getHeader();
            zxid = hdr.getZxid();
        }
    } catch (IOException e) {
        LOG.warn("Unexpected exception", e);
    } finally {
        close(itr);
    }
    return zxid;
}
Also used : IOException(java.io.IOException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) TxnIterator(org.apache.zookeeper.server.persistence.TxnLog.TxnIterator) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Example 19 with TxnHeader

use of org.apache.zookeeper.txn.TxnHeader in project zookeeper by apache.

the class FileTxnSnapLog method restore.

/**
     * this function restores the server
     * database after reading from the
     * snapshots and transaction logs
     * @param dt the datatree to be restored
     * @param sessions the sessions to be restored
     * @param listener the playback listener to run on the
     * database restoration
     * @return the highest zxid restored
     * @throws IOException
     */
public long restore(DataTree dt, Map<Long, Integer> sessions, PlayBackListener listener) throws IOException {
    long deserializeResult = snapLog.deserialize(dt, sessions);
    FileTxnLog txnLog = new FileTxnLog(dataDir);
    boolean trustEmptyDB;
    File initFile = new File(dataDir.getParent(), "initialize");
    if (Files.deleteIfExists(initFile.toPath())) {
        LOG.info("Initialize file found, an empty database will not block voting participation");
        trustEmptyDB = true;
    } else {
        trustEmptyDB = autoCreateDB;
    }
    if (-1L == deserializeResult) {
        /* this means that we couldn't find any snapshot, so we need to
             * initialize an empty database (reported in ZOOKEEPER-2325) */
        if (txnLog.getLastLoggedZxid() != -1) {
            throw new IOException("No snapshot found, but there are log entries. " + "Something is broken!");
        }
        if (trustEmptyDB) {
            /* TODO: (br33d) we should either put a ConcurrentHashMap on restore()
                 *       or use Map on save() */
            save(dt, (ConcurrentHashMap<Long, Integer>) sessions);
            /* return a zxid of 0, since we know the database is empty */
            return 0L;
        } else {
            /* return a zxid of -1, since we are possibly missing data */
            LOG.warn("Unexpected empty data tree, setting zxid to -1");
            dt.lastProcessedZxid = -1L;
            return -1L;
        }
    }
    TxnIterator itr = txnLog.read(dt.lastProcessedZxid + 1);
    long highestZxid = dt.lastProcessedZxid;
    TxnHeader hdr;
    try {
        while (true) {
            // iterator points to
            // the first valid txn when initialized
            hdr = itr.getHeader();
            if (hdr == null) {
                //empty logs
                return dt.lastProcessedZxid;
            }
            if (hdr.getZxid() < highestZxid && highestZxid != 0) {
                LOG.error("{}(higestZxid) > {}(next log) for type {}", new Object[] { highestZxid, hdr.getZxid(), hdr.getType() });
            } else {
                highestZxid = hdr.getZxid();
            }
            try {
                processTransaction(hdr, dt, sessions, itr.getTxn());
            } catch (KeeperException.NoNodeException e) {
                throw new IOException("Failed to process transaction type: " + hdr.getType() + " error: " + e.getMessage(), e);
            }
            listener.onTxnLoaded(hdr, itr.getTxn());
            if (!itr.next())
                break;
        }
    } finally {
        if (itr != null) {
            itr.close();
        }
    }
    return highestZxid;
}
Also used : IOException(java.io.IOException) File(java.io.File) TxnIterator(org.apache.zookeeper.server.persistence.TxnLog.TxnIterator) KeeperException(org.apache.zookeeper.KeeperException) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Example 20 with TxnHeader

use of org.apache.zookeeper.txn.TxnHeader in project zookeeper by apache.

the class Learner method syncWithLeader.

/**
     * Finally, synchronize our history with the Leader. 
     * @param newLeaderZxid
     * @throws IOException
     * @throws InterruptedException
     */
protected void syncWithLeader(long newLeaderZxid) throws Exception {
    QuorumPacket ack = new QuorumPacket(Leader.ACK, 0, null, null);
    QuorumPacket qp = new QuorumPacket();
    long newEpoch = ZxidUtils.getEpochFromZxid(newLeaderZxid);
    QuorumVerifier newLeaderQV = null;
    // In the DIFF case we don't need to do a snapshot because the transactions will sync on top of any existing snapshot
    // For SNAP and TRUNC the snapshot is needed to save that history
    boolean snapshotNeeded = true;
    readPacket(qp);
    LinkedList<Long> packetsCommitted = new LinkedList<Long>();
    LinkedList<PacketInFlight> packetsNotCommitted = new LinkedList<PacketInFlight>();
    synchronized (zk) {
        if (qp.getType() == Leader.DIFF) {
            LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
            snapshotNeeded = false;
        } else if (qp.getType() == Leader.SNAP) {
            LOG.info("Getting a snapshot from leader");
            // The leader is going to dump the database
            // db is clear as part of deserializeSnapshot()
            zk.getZKDatabase().deserializeSnapshot(leaderIs);
            String signature = leaderIs.readString("signature");
            if (!signature.equals("BenWasHere")) {
                LOG.error("Missing signature. Got " + signature);
                throw new IOException("Missing signature");
            }
        } else if (qp.getType() == Leader.TRUNC) {
            //we need to truncate the log to the lastzxid of the leader
            LOG.warn("Truncating log to get in sync with the leader 0x" + Long.toHexString(qp.getZxid()));
            boolean truncated = zk.getZKDatabase().truncateLog(qp.getZxid());
            if (!truncated) {
                // not able to truncate the log
                LOG.error("Not able to truncate the log " + Long.toHexString(qp.getZxid()));
                System.exit(13);
            }
        } else {
            LOG.error("Got unexpected packet from leader: {}, exiting ... ", LearnerHandler.packetToString(qp));
            System.exit(13);
        }
        zk.getZKDatabase().initConfigInZKDatabase(self.getQuorumVerifier());
        zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
        zk.createSessionTracker();
        long lastQueued = 0;
        // in Zab V1.0 (ZK 3.4+) we might take a snapshot when we get the NEWLEADER message, but in pre V1.0
        // we take the snapshot on the UPDATE message, since Zab V1.0 also gets the UPDATE (after the NEWLEADER)
        // we need to make sure that we don't take the snapshot twice.
        boolean isPreZAB1_0 = true;
        //If we are not going to take the snapshot be sure the transactions are not applied in memory
        // but written out to the transaction log
        boolean writeToTxnLog = !snapshotNeeded;
        // we are now going to start getting transactions to apply followed by an UPTODATE
        outerLoop: while (self.isRunning()) {
            readPacket(qp);
            switch(qp.getType()) {
                case Leader.PROPOSAL:
                    PacketInFlight pif = new PacketInFlight();
                    pif.hdr = new TxnHeader();
                    pif.rec = SerializeUtils.deserializeTxn(qp.getData(), pif.hdr);
                    if (pif.hdr.getZxid() != lastQueued + 1) {
                        LOG.warn("Got zxid 0x" + Long.toHexString(pif.hdr.getZxid()) + " expected 0x" + Long.toHexString(lastQueued + 1));
                    }
                    lastQueued = pif.hdr.getZxid();
                    if (pif.hdr.getType() == OpCode.reconfig) {
                        SetDataTxn setDataTxn = (SetDataTxn) pif.rec;
                        QuorumVerifier qv = self.configFromString(new String(setDataTxn.getData()));
                        self.setLastSeenQuorumVerifier(qv, true);
                    }
                    packetsNotCommitted.add(pif);
                    break;
                case Leader.COMMIT:
                case Leader.COMMITANDACTIVATE:
                    pif = packetsNotCommitted.peekFirst();
                    if (pif.hdr.getZxid() == qp.getZxid() && qp.getType() == Leader.COMMITANDACTIVATE) {
                        QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) pif.rec).getData()));
                        boolean majorChange = self.processReconfig(qv, ByteBuffer.wrap(qp.getData()).getLong(), qp.getZxid(), true);
                        if (majorChange) {
                            throw new Exception("changes proposed in reconfig");
                        }
                    }
                    if (!writeToTxnLog) {
                        if (pif.hdr.getZxid() != qp.getZxid()) {
                            LOG.warn("Committing " + qp.getZxid() + ", but next proposal is " + pif.hdr.getZxid());
                        } else {
                            zk.processTxn(pif.hdr, pif.rec);
                            packetsNotCommitted.remove();
                        }
                    } else {
                        packetsCommitted.add(qp.getZxid());
                    }
                    break;
                case Leader.INFORM:
                case Leader.INFORMANDACTIVATE:
                    PacketInFlight packet = new PacketInFlight();
                    packet.hdr = new TxnHeader();
                    if (qp.getType() == Leader.INFORMANDACTIVATE) {
                        ByteBuffer buffer = ByteBuffer.wrap(qp.getData());
                        long suggestedLeaderId = buffer.getLong();
                        byte[] remainingdata = new byte[buffer.remaining()];
                        buffer.get(remainingdata);
                        packet.rec = SerializeUtils.deserializeTxn(remainingdata, packet.hdr);
                        QuorumVerifier qv = self.configFromString(new String(((SetDataTxn) packet.rec).getData()));
                        boolean majorChange = self.processReconfig(qv, suggestedLeaderId, qp.getZxid(), true);
                        if (majorChange) {
                            throw new Exception("changes proposed in reconfig");
                        }
                    } else {
                        packet.rec = SerializeUtils.deserializeTxn(qp.getData(), packet.hdr);
                        // Log warning message if txn comes out-of-order
                        if (packet.hdr.getZxid() != lastQueued + 1) {
                            LOG.warn("Got zxid 0x" + Long.toHexString(packet.hdr.getZxid()) + " expected 0x" + Long.toHexString(lastQueued + 1));
                        }
                        lastQueued = packet.hdr.getZxid();
                    }
                    if (!writeToTxnLog) {
                        // Apply to db directly if we haven't taken the snapshot
                        zk.processTxn(packet.hdr, packet.rec);
                    } else {
                        packetsNotCommitted.add(packet);
                        packetsCommitted.add(qp.getZxid());
                    }
                    break;
                case Leader.UPTODATE:
                    LOG.info("Learner received UPTODATE message");
                    if (newLeaderQV != null) {
                        boolean majorChange = self.processReconfig(newLeaderQV, null, null, true);
                        if (majorChange) {
                            throw new Exception("changes proposed in reconfig");
                        }
                    }
                    if (isPreZAB1_0) {
                        zk.takeSnapshot();
                        self.setCurrentEpoch(newEpoch);
                    }
                    self.setZooKeeperServer(zk);
                    self.adminServer.setZooKeeperServer(zk);
                    break outerLoop;
                case // Getting NEWLEADER here instead of in discovery 
                Leader.NEWLEADER:
                    // means this is Zab 1.0
                    LOG.info("Learner received NEWLEADER message");
                    if (qp.getData() != null && qp.getData().length > 1) {
                        try {
                            QuorumVerifier qv = self.configFromString(new String(qp.getData()));
                            self.setLastSeenQuorumVerifier(qv, true);
                            newLeaderQV = qv;
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                    if (snapshotNeeded) {
                        zk.takeSnapshot();
                    }
                    self.setCurrentEpoch(newEpoch);
                    //Anything after this needs to go to the transaction log, not applied directly in memory
                    writeToTxnLog = true;
                    isPreZAB1_0 = false;
                    writePacket(new QuorumPacket(Leader.ACK, newLeaderZxid, null, null), true);
                    break;
            }
        }
    }
    ack.setZxid(ZxidUtils.makeZxid(newEpoch, 0));
    writePacket(ack, true);
    sock.setSoTimeout(self.tickTime * self.syncLimit);
    zk.startup();
    /*
         * Update the election vote here to ensure that all members of the
         * ensemble report the same vote to new servers that start up and
         * send leader election notifications to the ensemble.
         * 
         * @see https://issues.apache.org/jira/browse/ZOOKEEPER-1732
         */
    self.updateElectionVote(newEpoch);
    // We need to log the stuff that came in between the snapshot and the uptodate
    if (zk instanceof FollowerZooKeeperServer) {
        FollowerZooKeeperServer fzk = (FollowerZooKeeperServer) zk;
        for (PacketInFlight p : packetsNotCommitted) {
            fzk.logRequest(p.hdr, p.rec);
        }
        for (Long zxid : packetsCommitted) {
            fzk.commit(zxid);
        }
    } else if (zk instanceof ObserverZooKeeperServer) {
        // Similar to follower, we need to log requests between the snapshot
        // and UPTODATE
        ObserverZooKeeperServer ozk = (ObserverZooKeeperServer) zk;
        for (PacketInFlight p : packetsNotCommitted) {
            Long zxid = packetsCommitted.peekFirst();
            if (p.hdr.getZxid() != zxid) {
                // log warning message if there is no matching commit
                // old leader send outstanding proposal to observer
                LOG.warn("Committing " + Long.toHexString(zxid) + ", but next proposal is " + Long.toHexString(p.hdr.getZxid()));
                continue;
            }
            packetsCommitted.remove();
            Request request = new Request(null, p.hdr.getClientId(), p.hdr.getCxid(), p.hdr.getType(), null, null);
            request.setTxn(p.rec);
            request.setHdr(p.hdr);
            ozk.commitRequest(request);
        }
    } else {
        // New server type need to handle in-flight packets
        throw new UnsupportedOperationException("Unknown server type");
    }
}
Also used : Request(org.apache.zookeeper.server.Request) IOException(java.io.IOException) SetDataTxn(org.apache.zookeeper.txn.SetDataTxn) ByteBuffer(java.nio.ByteBuffer) QuorumVerifier(org.apache.zookeeper.server.quorum.flexible.QuorumVerifier) LinkedList(java.util.LinkedList) ConnectException(java.net.ConnectException) IOException(java.io.IOException) TxnHeader(org.apache.zookeeper.txn.TxnHeader)

Aggregations

TxnHeader (org.apache.zookeeper.txn.TxnHeader)24 Record (org.apache.jute.Record)15 IOException (java.io.IOException)13 File (java.io.File)9 SetDataTxn (org.apache.zookeeper.txn.SetDataTxn)8 ByteBuffer (java.nio.ByteBuffer)7 CreateTxn (org.apache.zookeeper.txn.CreateTxn)6 Test (org.junit.Test)6 BinaryOutputArchive (org.apache.jute.BinaryOutputArchive)5 KeeperException (org.apache.zookeeper.KeeperException)5 Request (org.apache.zookeeper.server.Request)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 BinaryInputArchive (org.apache.jute.BinaryInputArchive)4 Stat (org.apache.zookeeper.data.Stat)4 ZKDatabase (org.apache.zookeeper.server.ZKDatabase)4 FileTxnSnapLog (org.apache.zookeeper.server.persistence.FileTxnSnapLog)4 QuorumVerifier (org.apache.zookeeper.server.quorum.flexible.QuorumVerifier)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 EOFException (java.io.EOFException)3 ArrayList (java.util.ArrayList)3