use of com.zimbra.cs.redolog.op.CommitTxn in project zm-mailbox by Zimbra.
the class RedoLogManager method getChangedMailboxesSince.
/**
* Returns the set of mailboxes that had any committed changes since a
* particular CommitId in the past, by scanning redologs. Also returns
* the last CommitId seen during the scanning process.
* @param cid
* @return can be null if server is shutting down
* @throws IOException
* @throws MailServiceException
*/
public Pair<Set<Integer>, CommitId> getChangedMailboxesSince(CommitId cid) throws IOException, MailServiceException {
Set<Integer> mailboxes = new HashSet<Integer>();
// Grab a read lock to prevent rollover.
ReadLock readLock = mRWLock.readLock();
try {
readLock.lockInterruptibly();
} catch (InterruptedException e) {
synchronized (mShuttingDownGuard) {
if (!mShuttingDown)
ZimbraLog.redolog.error("InterruptedException during redo log scan for CommitId", e);
else
ZimbraLog.redolog.debug("Redo log scan for CommitId interrupted for shutdown");
}
return null;
}
File linkDir = null;
File[] logs;
try {
try {
long seq = cid.getRedoSeq();
File[] archived = getArchivedLogsFromSequence(seq);
if (archived != null) {
logs = new File[archived.length + 1];
System.arraycopy(archived, 0, logs, 0, archived.length);
logs[archived.length] = mLogFile;
} else {
logs = new File[] { mLogFile };
}
// Make sure the first log has the sequence in cid.
FileLogReader firstLog = new FileLogReader(logs[0]);
if (firstLog.getHeader().getSequence() != seq) {
// Most likely, the CommitId is too old.
throw MailServiceException.INVALID_COMMIT_ID(cid.toString());
}
// Create a temp directory and make hard links to all redologs.
// This prevents the logs from disappearing while being scanned.
String dirName = "tmp-scan-" + System.currentTimeMillis();
linkDir = new File(mLogFile.getParentFile(), dirName);
if (linkDir.exists()) {
int suffix = 1;
while (linkDir.exists()) {
linkDir = new File(mLogFile.getParentFile(), dirName + "-" + suffix);
}
}
if (!linkDir.mkdir())
throw new IOException("Unable to create temp dir " + linkDir.getAbsolutePath());
for (int i = 0; i < logs.length; i++) {
File src = logs[i];
File dest = new File(linkDir, logs[i].getName());
IO.link(src.getAbsolutePath(), dest.getAbsolutePath());
logs[i] = dest;
}
} finally {
// We can let rollover happen now.
readLock.unlock();
}
// Scan redologs to get list with IDs of mailboxes that have
// committed changes since the given commit id.
long lastSeq = -1;
CommitTxn lastCommitTxn = null;
boolean foundMarker = false;
for (File logfile : logs) {
FileLogReader logReader = new FileLogReader(logfile);
logReader.open();
lastSeq = logReader.getHeader().getSequence();
try {
RedoableOp op = null;
while ((op = logReader.getNextOp()) != null) {
if (ZimbraLog.redolog.isDebugEnabled())
ZimbraLog.redolog.debug("Read: " + op);
if (!(op instanceof CommitTxn))
continue;
lastCommitTxn = (CommitTxn) op;
if (foundMarker) {
int mboxId = op.getMailboxId();
if (mboxId > 0)
mailboxes.add(mboxId);
} else {
if (cid.matches(lastCommitTxn))
foundMarker = true;
}
}
} catch (IOException e) {
ZimbraLog.redolog.warn("IOException while reading redolog file", e);
} finally {
logReader.close();
}
}
if (!foundMarker) {
// Most likely, the CommitId is too old.
throw MailServiceException.INVALID_COMMIT_ID(cid.toString());
}
CommitId lastCommitId = new CommitId(lastSeq, lastCommitTxn);
return new Pair<Set<Integer>, CommitId>(mailboxes, lastCommitId);
} finally {
if (linkDir != null) {
// Clean up the temp dir with links.
try {
if (linkDir.exists())
FileUtil.deleteDir(linkDir);
} catch (IOException e) {
ZimbraLog.redolog.warn("Unable to delete temporary directory " + linkDir.getAbsolutePath(), e);
}
}
}
}
use of com.zimbra.cs.redolog.op.CommitTxn in project zm-mailbox by Zimbra.
the class RedoPlayer method runCrashRecovery.
/**
*
* @param redoLogMgr
* @param postStartupRecoveryOps operations to recover/redo after startup
* completes and clients are allowed to
* connect
* @return number of operations redone (regardless of their success)
* @throws Exception
*/
public int runCrashRecovery(RedoLogManager redoLogMgr, List<RedoableOp> postStartupRecoveryOps) throws Exception {
File redoLog = redoLogMgr.getLogFile();
if (!redoLog.exists())
return 0;
long lookBackTstamp = Long.MAX_VALUE;
long lookBackDuration = RedoConfig.redoLogCrashRecoveryLookbackSec() * 1000;
if (lookBackDuration > 0) {
// Guess the last op's timestamp. Use the log file's last modified time. Sanity check it by
// going no earlier than the create time written in the log. We can't rely on the last
// op time field in the header because that is only accurate when the file was closed normally
// but in crash recovery we're not dealing with a normally closed log.
long logLastModTime = redoLog.lastModified();
long logCreateTime = (new FileLogReader(redoLog)).getHeader().getCreateTime();
long lastOpTstamp = Math.max(logLastModTime, logCreateTime);
lookBackTstamp = lastOpTstamp - lookBackDuration;
}
// scanLog can truncate the current redo.log if it finds junk data at the end
// from the previous crash. Close log writer before scanning and reopen after
// so we don't accidentally undo the truncation on the next write to the log.
LogWriter logWriter = redoLogMgr.getLogWriter();
logWriter.close();
scanLog(redoLog, false, null, Long.MIN_VALUE, Long.MAX_VALUE, lookBackTstamp);
logWriter.open();
int numOps;
synchronized (mOpsMapGuard) {
numOps = mOpsMap.size();
}
if (numOps == 0) {
ZimbraLog.redolog.info("No uncommitted transactions to redo");
return 0;
}
synchronized (mOpsMapGuard) {
Set entrySet = mOpsMap.entrySet();
ZimbraLog.redolog.info("Redoing " + numOps + " uncommitted transactions");
for (Iterator it = entrySet.iterator(); it.hasNext(); ) {
Map.Entry entry = (Entry) it.next();
RedoableOp op = (RedoableOp) entry.getValue();
if (op == null)
continue;
if (op.deferCrashRecovery()) {
ZimbraLog.redolog.info("Deferring crash recovery to after startup: " + op);
postStartupRecoveryOps.add(op);
continue;
}
if (ZimbraLog.redolog.isInfoEnabled())
ZimbraLog.redolog.info("REDOING: " + op);
boolean success = false;
try {
op.redo();
success = true;
} catch (Exception e) {
ZimbraLog.redolog.error("Redo failed for [" + op + "]." + " Backend state of affected item is indeterminate." + " Marking operation as aborted and moving on.", e);
} finally {
if (success) {
CommitTxn commit = new CommitTxn(op);
redoLogMgr.logOnly(commit, true);
} else {
AbortTxn abort = new AbortTxn(op);
redoLogMgr.logOnly(abort, true);
}
}
}
mOpsMap.clear();
}
return numOps;
}
use of com.zimbra.cs.redolog.op.CommitTxn in project zm-mailbox by Zimbra.
the class RedoPlayer method processOp.
private final void processOp(RedoableOp op, boolean redoCommitted, Map<Integer, Integer> mboxIDsMap, long startTime, long endTime, long ignoreCommitsAtOrAfter) throws ServiceException {
if (op.isStartMarker()) {
synchronized (mOpsMapGuard) {
mOpsMap.put(op.getTransactionId(), op);
if (mHasOrphanOps) {
RedoableOp x = mOrphanOps.remove(op.getTransactionId());
if (x != null)
ZimbraLog.redolog.error("Detected out-of-order insertion of change record for orphans commit/abort: change=" + op + ", orphan=" + x);
}
}
} else {
// those listed in the checkpoint.
if (op instanceof Checkpoint) {
Checkpoint ckpt = (Checkpoint) op;
Set txns = ckpt.getActiveTxns();
if (txns.size() > 0) {
synchronized (mOpsMapGuard) {
if (mOpsMap.size() != txns.size()) {
// Unexpected discrepancy
if (ZimbraLog.redolog.isDebugEnabled()) {
StringBuffer sb1 = new StringBuffer("Current Uncommitted Ops: ");
StringBuffer sb2 = new StringBuffer("Checkpoint Uncommitted Ops: ");
int i = 0;
for (Iterator it = mOpsMap.keySet().iterator(); it.hasNext(); i++) {
TransactionId id = (TransactionId) it.next();
if (i > 0)
sb1.append(", ");
sb1.append(id);
}
i = 0;
for (Iterator it = txns.iterator(); it.hasNext(); i++) {
TransactionId id = (TransactionId) it.next();
if (i > 0)
sb2.append(", ");
sb2.append(id);
}
ZimbraLog.redolog.info("Checkpoint discrepancy: # current uncommitted ops = " + mOpsMap.size() + ", # checkpoint uncommitted ops = " + txns.size() + "\nMAP DUMP:\n" + sb1.toString() + "\n" + sb2.toString());
}
}
}
} else {
synchronized (mOpsMapGuard) {
if (mOpsMap.size() != 0) {
// Unexpected discrepancy
if (ZimbraLog.redolog.isDebugEnabled()) {
StringBuffer sb1 = new StringBuffer("Current Uncommitted Ops: ");
int i = 0;
for (Iterator it = mOpsMap.keySet().iterator(); it.hasNext(); i++) {
TransactionId id = (TransactionId) it.next();
if (i > 0)
sb1.append(", ");
sb1.append(id);
}
ZimbraLog.redolog.info("Checkpoint discrepancy: # current uncommitted ops = " + mOpsMap.size() + " instead of 0\nMAP DUMP:\n" + sb1.toString());
}
}
}
}
} else if (op.isEndMarker()) {
// Ignore if op is a commit and its timestamp is at or after ignoreCommitsAtOrAfter.
// In other words, don't ignore if op is a rollback OR its timestamp is before ignoreCommitsAtOrAfter.
boolean isCommitOp = op instanceof CommitTxn;
long opTstamp = op.getTimestamp();
if (!isCommitOp || opTstamp < ignoreCommitsAtOrAfter) {
// Encountered COMMIT or ABORT. Discard the
// corresponding op from map, and optionally execute the committed op.
RedoableOp prepareOp;
synchronized (mOpsMapGuard) {
prepareOp = (RedoableOp) mOpsMap.remove(op.getTransactionId());
if (prepareOp == null) {
mHasOrphanOps = true;
ZimbraLog.redolog.error("Commit/abort record encountered before corresponding change record (" + op + ")");
TransactionId tid = op.getTransactionId();
RedoableOp x = (RedoableOp) mOrphanOps.get(tid);
if (x != null)
ZimbraLog.redolog.error("Op [" + op + "] is already in orphans map: value=" + x);
mOrphanOps.put(tid, op);
}
}
if (redoCommitted && prepareOp != null && isCommitOp && (startTime == -1 || prepareOp.getTimestamp() >= startTime) && opTstamp < endTime) {
boolean allowRedo = false;
if (mboxIDsMap == null) {
// Caller doesn't care which mailbox(es) the op is for.
allowRedo = true;
} else {
int opMailboxId = prepareOp.getMailboxId();
if (prepareOp instanceof StoreIncomingBlob) {
assert (opMailboxId == RedoableOp.MAILBOX_ID_ALL);
// special case for StoreIncomingBlob op that has
// a list of mailbox IDs.
StoreIncomingBlob storeOp = (StoreIncomingBlob) prepareOp;
List<Integer> list = storeOp.getMailboxIdList();
if (list != null) {
Set<Integer> opMboxIds = new HashSet<Integer>(list);
for (Map.Entry<Integer, Integer> entry : mboxIDsMap.entrySet()) {
if (opMboxIds.contains(entry.getKey())) {
allowRedo = true;
// Replace the mailbox ID list in the op. We're
// replaying it only for the target mailbox ID we're
// interested in.
List<Integer> newList = new ArrayList<Integer>(mboxIDsMap.values());
storeOp.setMailboxIdList(newList);
break;
}
}
} else {
// Prior to redolog version 1.0 StoreIncomingBlob
// didn't keep track of mailbox list. Always recreate
// the blob since we don't know which mailboxes will
// need it.
allowRedo = true;
}
} else if (opMailboxId == RedoableOp.MAILBOX_ID_ALL) {
// This case should be checked after StoreIncomingBlob
// case because StoreIncomingBlob has mailbox ID of
// MAILBOX_ID_ALL.
allowRedo = true;
} else {
for (Map.Entry<Integer, Integer> entry : mboxIDsMap.entrySet()) {
if (opMailboxId == entry.getKey().intValue()) {
if (entry.getValue() != null) {
// restore to a different mailbox
prepareOp.setMailboxId(entry.getValue().intValue());
}
allowRedo = true;
break;
}
}
}
}
if (allowRedo) {
if (mSkipDeleteOps && prepareOp.isDeleteOp()) {
ZimbraLog.redolog.info("Skipping delete op: " + prepareOp.toString());
} else {
try {
if (ZimbraLog.redolog.isDebugEnabled())
ZimbraLog.redolog.debug("Redoing: " + prepareOp.toString());
prepareOp.setUnloggedReplay(mUnloggedReplay);
playOp(prepareOp);
} catch (Exception e) {
if (!ignoreReplayErrors())
throw ServiceException.FAILURE("Error executing redoOp", e);
else
ZimbraLog.redolog.warn("Ignoring error during redo log replay: " + e.getMessage(), e);
}
}
}
}
}
}
}
}
use of com.zimbra.cs.redolog.op.CommitTxn in project zm-mailbox by Zimbra.
the class FileLogWriter method log.
/**
* Log the supplied bytes. Depending on the value of synchronous argument
* and the setting of fsync interval, this method can do one of 3 things:
*
* case 1: !synchronous
* action: write() only; no flush/fsync
*
* case 2: synchronous && fsyncInterval > 0
* action: write(), then wait() until notified by fsync thread
* Current thread only calls write() on the RandomAccessFile, and blocks to
* wait for the next scheduled fsync. Fsync thread periodically flushes
* the output stream and fsync's the file. After each fsync, all waiting
* logger threads are notified to continue. This method batches multiple
* log items before each fsync, and results in greater throughput than
* calling fsync after each log item because fsync to physical disk is
* a high-latency operation.
*
* case 3: synchronous && fsyncInterval <= 0
* action: write(), then fsync() in the current thread
* Fsync is required, but the sleep interval for fsync thread is 0. We
* special case this condition to mean fsync should be done by the calling
* thread.
*/
@Override
public void log(RedoableOp op, InputStream data, boolean synchronous) throws IOException {
int seq;
boolean sameMboxAsLastOp = false;
synchronized (mLock) {
if (mRAF == null)
throw new IOException("Redolog file closed");
// Record first transaction in header.
long tstamp = op.getTimestamp();
mLastOpTstamp = Math.max(tstamp, mLastOpTstamp);
if (mFirstOpTstamp == 0) {
mFirstOpTstamp = tstamp;
mHeader.setFirstOpTstamp(mFirstOpTstamp);
mHeader.setLastOpTstamp(mLastOpTstamp);
long pos = mRAF.getFilePointer();
mHeader.write(mRAF);
mRAF.seek(pos);
}
mLogSeq++;
mLogCount++;
seq = mLogSeq;
int numRead;
byte[] buf = new byte[1024];
while ((numRead = data.read(buf)) >= 0) {
mRAF.write(buf, 0, numRead);
mFileSize += numRead;
}
data.close();
// callbacks made on their behalf are truly in the correct order.
if (op instanceof CommitTxn) {
CommitTxn cmt = (CommitTxn) op;
RedoCommitCallback cb = cmt.getCallback();
if (cb != null) {
long redoSeq = mRedoLogMgr.getRolloverManager().getCurrentSequence();
CommitId cid = new CommitId(redoSeq, (CommitTxn) op);
Notif notif = new Notif(cb, cid);
// We queue it instead making the callback right away.
// Call it only after the commit record has been fsynced.
mCommitNotifyQueue.push(notif);
}
}
mLastLogTime = System.currentTimeMillis();
sameMboxAsLastOp = mLastOpMboxId == op.getMailboxId();
mLastOpMboxId = op.getMailboxId();
}
// cases 1 above
if (!synchronous)
return;
if (mFsyncIntervalMS > 0) {
if (!sameMboxAsLastOp) {
// case 2
try {
// wait for fsync thread to write this entry to disk
synchronized (mFsyncCond) {
mFsyncCond.wait(10000);
}
} catch (InterruptedException e) {
ZimbraLog.redolog.info("Thread interrupted during fsync");
}
synchronized (mLock) {
// timed out, so fsync in this thread
if (seq > mFsyncSeq)
fsync();
}
} else {
// If this op is on same mailbox as last op, let's assume there's a thread issuing
// many updates on a single mailbox in a loop, such as when importing a large ics file.
// We don't want to pause for mFsyncIntervalMS between every op (because all writes
// to a single mailbox are synchronized), so fsync inline and return immediately.
fsync();
}
} else {
// case 3
fsync();
}
}
use of com.zimbra.cs.redolog.op.CommitTxn in project zm-mailbox by Zimbra.
the class RedoLogManager method commit.
/**
* Logs the COMMIT record for an operation.
* @param op
*/
public void commit(RedoableOp op) {
if (mEnabled) {
long redoSeq = mRolloverMgr.getCurrentSequence();
CommitTxn commit = new CommitTxn(op);
// Commit records are written without fsync. It's okay to
// allow fsync to happen by itself or wait for one during
// logging of next redo item.
log(commit, false);
commit.setSerializedByteArray(null);
}
}
Aggregations