use of alluxio.master.journal.CatchupFuture in project alluxio by Alluxio.
the class BackupWorkerRole method handleRequestMessage.
/**
* Handler for backup request message. It's used in standby master.
*/
private CompletableFuture<Void> handleRequestMessage(BackupRequestMessage requestMsg) {
LOG.info("Received backup message: {}", requestMsg);
Preconditions.checkState(!mBackupTracker.inProgress(), "Backup in progress");
// Create a completed future for returning form this handler.
// This future is only used for providing a receipt of message.
CompletableFuture<Void> msgFuture = CompletableFuture.completedFuture(null);
// Reset backup tracker.
mBackupTracker.reset();
// Update current backup status with given backup id.
mBackupTracker.update(new BackupStatus(requestMsg.getBackupId(), BackupState.Initiating));
mBackupTracker.updateHostname(NetworkAddressUtils.getLocalHostName((int) ServerConfiguration.global().getMs(PropertyKey.NETWORK_HOST_RESOLUTION_TIMEOUT_MS)));
// Start sending backup progress to leader.
startHeartbeatThread();
// Cancel timeout task created by suspend message handler.
if (!mBackupTimeoutTask.cancel(true)) {
LOG.warn("Journal has been resumed due to a time-out");
mBackupTracker.updateError(new BackupException("Journal has been resumed due to a time-out"));
return msgFuture;
}
// Spawn a task for advancing journals to target sequences, then taking the backup.
mBackupFuture = mExecutorService.submit(() -> {
// Mark state as transitioning.
mBackupTracker.updateState(BackupState.Transitioning);
try {
LOG.info("Initiating catching up of journals to consistent sequences before starting backup. {}", requestMsg.getJournalSequences());
CatchupFuture catchupFuture = mJournalSystem.catchup(requestMsg.getJournalSequences());
CompletableFuture.runAsync(() -> catchupFuture.waitTermination()).get(BACKUP_ABORT_AFTER_TRANSITION_TIMEOUT_MS, TimeUnit.MILLISECONDS);
LOG.info("Journal transition completed. Taking a backup.");
mBackupTracker.updateState(BackupState.Running);
AlluxioURI backupUri = takeBackup(requestMsg.getBackupRequest(), mBackupTracker.getEntryCounter());
mBackupTracker.updateBackupUri(backupUri);
mBackupTracker.updateState(BackupState.Completed);
// Wait until backup heartbeats are completed.
try {
mBackupProgressFuture.get();
} catch (Exception e) {
LOG.warn("Failed to wait for backup heartbeat completion. ", e);
}
} catch (InterruptedException e) {
LOG.error("Backup interrupted at worker", e);
mBackupTracker.updateError(new BackupException("Backup interrupted at worker", e));
} catch (Exception e) {
LOG.error("Backup failed at worker", e);
mBackupTracker.updateError(new BackupException(String.format("Backup failed at worker: %s", e.getMessage()), e));
} finally {
enforceResumeJournals();
}
});
return msgFuture;
}
use of alluxio.master.journal.CatchupFuture in project alluxio by Alluxio.
the class RaftJournalTest method gainPrimacyAfterCatchup.
@Test
public void gainPrimacyAfterCatchup() throws Exception {
// Create a counting master implementation that counts how many journal entries it processed.
CountingDummyFileSystemMaster countingMaster = new CountingDummyFileSystemMaster();
mFollowerJournalSystem.createJournal(countingMaster);
// Suspend follower journal system.
mFollowerJournalSystem.suspend(null);
// Catch up follower journal system to target-index:5.
final long catchupIndex = 5;
Map<String, Long> backupSequences = new HashMap<>();
backupSequences.put("FileSystemMaster", catchupIndex);
CatchupFuture catchupFuture = mFollowerJournalSystem.catchup(backupSequences);
// Create entries on the leader journal context.
// These will be replicated to follower journal context.
final int entryCount = 10;
try (JournalContext journalContext = mLeaderJournalSystem.createJournal(new NoopMaster()).createJournalContext()) {
for (int i = 0; i < entryCount; i++) {
journalContext.append(alluxio.proto.journal.Journal.JournalEntry.newBuilder().setInodeLastModificationTime(File.InodeLastModificationTimeEntry.newBuilder().setId(i).build()).build());
}
}
// Wait until caught up.
catchupFuture.waitTermination();
Assert.assertEquals(catchupIndex + 1, countingMaster.getApplyCount());
// Gain primacy in follower journal and validate it catches up.
promoteFollower();
CommonUtils.waitFor("full state acquired after resume", () -> countingMaster.getApplyCount() == entryCount, mWaitOptions);
// Follower should no longer be suspended after becoming primary.
Assert.assertFalse(mFollowerJournalSystem.isSuspended());
}
use of alluxio.master.journal.CatchupFuture in project alluxio by Alluxio.
the class BufferedJournalApplier method catchup.
/**
* Initiates catching up of the applier to a target sequence.
* This method leaves the applier in suspended state.
*
* @param sequence target sequence
* @return the future to track when applier reaches the target sequence
*/
public CatchupFuture catchup(long sequence) {
try (LockResource stateLock = new LockResource(mStateLock)) {
Preconditions.checkState(mSuspended, "Not suspended");
Preconditions.checkState(!mResumeInProgress, "Resume in progress");
Preconditions.checkState(mCatchupThread == null || !mCatchupThread.isAlive(), "Catch-up task in progress.");
Preconditions.checkState(sequence >= 0, "Invalid negative sequence: %d", sequence);
Preconditions.checkState(mLastAppliedSequence <= sequence, "Can't catchup to past. Current: %d, Requested: %d", mLastAppliedSequence, sequence);
LOG.info("Catching up state machine to sequence: {}", sequence);
// Complete the request if already at target sequence.
if (mLastAppliedSequence == sequence) {
return CatchupFuture.completed();
}
// Create an async task for catching up to target sequence.
mCatchupThread = new RaftJournalCatchupThread(sequence);
mCatchupThread.start();
return new CatchupFuture(mCatchupThread);
}
}
use of alluxio.master.journal.CatchupFuture in project alluxio by Alluxio.
the class RaftJournalTest method catchUpInSteps.
// Raft journal receives leader knowledge in chunks.
// So advancing should take into account seeing partial knowledge.
@Test
public void catchUpInSteps() throws Exception {
// Create a counting master implementation that counts how many journal entries it processed.
CountingDummyFileSystemMaster countingMaster = new CountingDummyFileSystemMaster();
mFollowerJournalSystem.createJournal(countingMaster);
// Suspend follower journal system.
mFollowerJournalSystem.suspend(null);
final int entryBatchCount = 5;
// Create batch of entries on the leader journal context.
try (JournalContext journalContext = mLeaderJournalSystem.createJournal(new NoopMaster()).createJournalContext()) {
for (int i = 0; i < entryBatchCount; i++) {
journalContext.append(alluxio.proto.journal.Journal.JournalEntry.newBuilder().setInodeLastModificationTime(File.InodeLastModificationTimeEntry.newBuilder().setId(i).build()).build());
}
}
// Catch up follower journal system to target-index:(fileCount * 2) - 1.
Map<String, Long> backupSequences = new HashMap<>();
backupSequences.put("FileSystemMaster", (long) (entryBatchCount * 2) - 1);
CatchupFuture catchupFuture = mFollowerJournalSystem.catchup(backupSequences);
// Create next batch of entries on the leader journal context.
try (JournalContext journalContext = mLeaderJournalSystem.createJournal(new NoopMaster()).createJournalContext()) {
for (int i = 0; i < entryBatchCount; i++) {
journalContext.append(alluxio.proto.journal.Journal.JournalEntry.newBuilder().setInodeLastModificationTime(File.InodeLastModificationTimeEntry.newBuilder().setId(i).build()).build());
}
}
// Wait for sequence to be caught up.
catchupFuture.waitTermination();
Assert.assertEquals(entryBatchCount * 2, countingMaster.getApplyCount());
// Catchup on the already met sequence.
mFollowerJournalSystem.catchup(backupSequences);
Assert.assertEquals(entryBatchCount * 2, countingMaster.getApplyCount());
}
use of alluxio.master.journal.CatchupFuture in project alluxio by Alluxio.
the class RaftJournalTest method suspendCatchupResume.
@Test
public void suspendCatchupResume() throws Exception {
// Create a counting master implementation that counts how many journal entries it processed.
CountingDummyFileSystemMaster countingMaster = new CountingDummyFileSystemMaster();
mFollowerJournalSystem.createJournal(countingMaster);
// Suspend follower journal system.
mFollowerJournalSystem.suspend(null);
try {
mFollowerJournalSystem.suspend(null);
Assert.fail("Suspend succeeded for already suspended journal.");
} catch (Exception e) {
// Expected to fail when suspending a suspended journal.
}
// Catch up follower journal system to target-index:5.
final long catchupIndex = 5;
Map<String, Long> backupSequences = new HashMap<>();
backupSequences.put("FileSystemMaster", catchupIndex);
CatchupFuture catchupFuture = mFollowerJournalSystem.catchup(backupSequences);
// Create entries on the leader journal context.
// These will be replicated to follower journal context.
final int entryCount = 10;
try (JournalContext journalContext = mLeaderJournalSystem.createJournal(new NoopMaster()).createJournalContext()) {
for (int i = 0; i < entryCount; i++) {
journalContext.append(alluxio.proto.journal.Journal.JournalEntry.newBuilder().setInodeLastModificationTime(File.InodeLastModificationTimeEntry.newBuilder().setId(i).build()).build());
}
}
// Wait for sequences to be caught up.
catchupFuture.waitTermination();
Assert.assertEquals(catchupIndex + 1, countingMaster.getApplyCount());
// Wait for election timeout and verify follower master state hasn't changed.
Thread.sleep(ServerConfiguration.getMs(PropertyKey.MASTER_EMBEDDED_JOURNAL_MAX_ELECTION_TIMEOUT));
Assert.assertEquals(catchupIndex + 1, countingMaster.getApplyCount());
// Exit backup mode and wait until follower master acquires the current knowledge.
mFollowerJournalSystem.resume();
CommonUtils.waitFor("full state acquired", () -> countingMaster.getApplyCount() == entryCount, mWaitOptions);
// Write more entries and validate they are replicated to follower.
try (JournalContext journalContext = mLeaderJournalSystem.createJournal(new NoopMaster()).createJournalContext()) {
journalContext.append(alluxio.proto.journal.Journal.JournalEntry.newBuilder().setInodeLastModificationTime(File.InodeLastModificationTimeEntry.newBuilder().setId(entryCount).build()).build());
}
CommonUtils.waitFor("full state acquired after resume", () -> countingMaster.getApplyCount() == entryCount + 1, mWaitOptions);
}
Aggregations