use of alluxio.exception.JournalClosedException.IOJournalClosedException in project alluxio by Alluxio.
the class UfsJournalLogWriter method flush.
public synchronized void flush() throws IOException, JournalClosedException {
checkIsWritable();
maybeRecoverFromUfsFailures();
if (mJournalOutputStream == null || mJournalOutputStream.bytesWritten() == 0) {
// There is nothing to flush.
return;
}
try {
mJournalOutputStream.flush();
// Since flush has succeeded, it's safe to clear the mEntriesToFlush queue
// because they are considered "persisted" in UFS.
mEntriesToFlush.clear();
} catch (IOJournalClosedException e) {
throw e.toJournalClosedException();
} catch (IOException e) {
// On next operation, attempt to recover from a UFS failure
mNeedsRecovery = true;
UfsJournalFile currentLog = mJournalOutputStream.currentLog();
mJournalOutputStream = null;
throw new IOException(ExceptionMessage.JOURNAL_FLUSH_FAILURE.getMessageWithUrl(RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL, currentLog, e.getMessage()), e);
}
boolean overSize = mJournalOutputStream.bytesWritten() >= mMaxLogSize;
if (overSize || !mUfs.supportsFlush()) {
// local temporary file, call close and complete the log to sync the journal entry to S3/OSS.
if (overSize) {
LOG.info("Rotating log file {}. size: {} maxSize: {}", currentLogName(), mJournalOutputStream.bytesWritten(), mMaxLogSize);
}
mRotateLogForNextWrite = true;
}
}
use of alluxio.exception.JournalClosedException.IOJournalClosedException in project alluxio by Alluxio.
the class UfsJournalLogWriter method write.
public synchronized void write(JournalEntry entry) throws IOException, JournalClosedException {
checkIsWritable();
try {
maybeRecoverFromUfsFailures();
maybeRotateLog();
} catch (IOJournalClosedException e) {
throw e.toJournalClosedException();
}
try {
JournalEntry entryToWrite = entry.toBuilder().setSequenceNumber(mNextSequenceNumber).build();
entryToWrite.writeDelimitedTo(mJournalOutputStream);
LOG.debug("Adding journal entry (seq={}) to retryList with {} entries. currentLog: {}", entryToWrite.getSequenceNumber(), mEntriesToFlush.size(), currentLogName());
mEntriesToFlush.add(entryToWrite);
mNextSequenceNumber++;
} catch (IOJournalClosedException e) {
throw e.toJournalClosedException();
} catch (IOException e) {
// Set mNeedsRecovery to true so that {@code maybeRecoverFromUfsFailures}
// can know a UFS failure has occurred.
mNeedsRecovery = true;
throw new IOException(ExceptionMessage.JOURNAL_WRITE_FAILURE.getMessageWithUrl(RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL, mJournalOutputStream.currentLog(), e.getMessage()), e);
}
}
use of alluxio.exception.JournalClosedException.IOJournalClosedException in project alluxio by Alluxio.
the class UfsJournalLogWriter method maybeRecoverFromUfsFailures.
/**
* Core logic of UFS journal recovery from UFS failures.
*
* If Alluxio stores its journals in UFS, then Alluxio needs to handle UFS failures.
* When UFS is dead, there is nothing Alluxio can do because Alluxio relies on UFS to
* persist journal entries. Consequently any metadata operation will block because Alluxio
* cannot flush their journal entries.
* Once UFS comes back online, Alluxio needs to perform the following operations:
* 1. Find out the sequence number of the last persisted journal entry, say X. Then the first
* non-persisted entry has sequence number Y = X + 1.
* 2. Check whether there is any missing journal entry between Y (inclusive) and the oldest
* entry in mEntriesToFlush, say Z. If Z > Y, then it means journal entries in [Y, Z) are
* missing, and Alluxio cannot recover. Otherwise, for each journal entry in
* {@link #mEntriesToFlush}, if its sequence number is larger than or equal to Y, retry
* writing it to UFS by calling the {@code UfsJournalLogWriter#write} method.
*/
private void maybeRecoverFromUfsFailures() throws IOException, JournalClosedException {
checkIsWritable();
if (!mNeedsRecovery) {
return;
}
try (Timer.Context ctx = MetricsSystem.timer(MetricKey.MASTER_UFS_JOURNAL_FAILURE_RECOVER_TIMER.getName()).time()) {
long lastPersistSeq = recoverLastPersistedJournalEntry();
if (lastPersistSeq == -1) {
throw new RuntimeException("Cannot find any journal entry to recover. location: " + mJournal.getLocation());
}
createNewLogFile(lastPersistSeq + 1);
if (!mEntriesToFlush.isEmpty()) {
JournalEntry firstEntryToFlush = mEntriesToFlush.peek();
if (firstEntryToFlush.getSequenceNumber() > lastPersistSeq + 1) {
throw new RuntimeException(ExceptionMessage.JOURNAL_ENTRY_MISSING.getMessageWithUrl(RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL, lastPersistSeq + 1, firstEntryToFlush.getSequenceNumber()));
}
long retryEndSeq = lastPersistSeq;
LOG.info("Retry writing unwritten journal entries from seq {} to currentLog {}", lastPersistSeq + 1, currentLogName());
for (JournalEntry entry : mEntriesToFlush) {
if (entry.getSequenceNumber() > lastPersistSeq) {
try {
entry.toBuilder().build().writeDelimitedTo(mJournalOutputStream);
retryEndSeq = entry.getSequenceNumber();
} catch (IOJournalClosedException e) {
throw e.toJournalClosedException();
} catch (IOException e) {
throw new IOException(ExceptionMessage.JOURNAL_WRITE_FAILURE.getMessageWithUrl(RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL, mJournalOutputStream.currentLog(), e.getMessage()), e);
}
}
}
LOG.info("Finished writing unwritten journal entries from {} to {}. currentLog: {}", lastPersistSeq + 1, retryEndSeq, currentLogName());
if (retryEndSeq != mNextSequenceNumber - 1) {
throw new RuntimeException("Failed to recover all entries to flush, expecting " + (mNextSequenceNumber - 1) + " but only found entry " + retryEndSeq + " currentLog: " + currentLogName());
}
}
}
mNeedsRecovery = false;
}
Aggregations