use of alluxio.exception.BackupAbortedException in project alluxio by Alluxio.
the class JournalBackupIntegrationTest method backupDelegationFailoverProtocol.
// Tests various protocols and configurations for backup delegation during fail-overs.
@Test
public void backupDelegationFailoverProtocol() throws Exception {
mCluster = MultiProcessCluster.newBuilder(PortCoordination.BACKUP_DELEGATION_FAILOVER_PROTOCOL).setClusterName("backupDelegationFailoverProtocol").setNumMasters(2).addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS.toString()).addProperty(PropertyKey.ZOOKEEPER_SESSION_TIMEOUT, "1sec").addProperty(PropertyKey.MASTER_BACKUP_CONNECT_INTERVAL_MIN, "100ms").addProperty(PropertyKey.MASTER_BACKUP_CONNECT_INTERVAL_MAX, "100ms").addProperty(PropertyKey.MASTER_BACKUP_DELEGATION_ENABLED, "true").addProperty(PropertyKey.MASTER_BACKUP_ABANDON_TIMEOUT, "3sec").build();
File backups = AlluxioTestDirectory.createTemporaryDirectory("backups");
mCluster.start();
// Validate backup works with delegation.
waitForBackup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build());
// Find standby master index.
int primaryIdx = mCluster.getPrimaryMasterIndex(GET_PRIMARY_INDEX_TIMEOUT_MS);
int followerIdx = (primaryIdx + 1) % 2;
// Schedule async backup.
UUID backupId = mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setRunAsync(true)).build()).getBackupId();
// Kill follower immediately before it sends the next heartbeat to leader.
mCluster.stopMaster(followerIdx);
// Wait until backup is abandoned.
CommonUtils.waitForResult("Backup abandoned.", () -> {
try {
return mCluster.getMetaMasterClient().getBackupStatus(backupId);
} catch (Exception e) {
throw new RuntimeException(String.format("Unexpected error while getting backup status: %s", e.toString()));
}
}, (backupStatus) -> backupStatus.getError() instanceof BackupAbortedException);
// Restart follower to restore HA.
mCluster.startMaster(followerIdx);
// Validate delegated backup works again.
waitForBackup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build());
// Schedule async backup.
mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setRunAsync(true)).build()).getBackupId();
// Kill leader immediately before it receives the next heartbeat from backup-worker.
mCluster.waitForAndKillPrimaryMaster(PRIMARY_KILL_TIMEOUT_MS);
// Wait until follower steps up.
assertEquals(mCluster.getPrimaryMasterIndex(GET_PRIMARY_INDEX_TIMEOUT_MS), followerIdx);
// Follower should step-up without problem and accept backup requests.
mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setAllowLeader(true)).build());
mCluster.notifySuccess();
}
use of alluxio.exception.BackupAbortedException in project alluxio by Alluxio.
the class BackupLeaderRole method activateWorkerConnection.
/**
* Prepares new follower connection.
*/
private void activateWorkerConnection(GrpcMessagingConnection workerConnection) {
LOG.info("Backup-leader connected with backup-worker: {}", workerConnection);
// Register handshake message handler.
workerConnection.handler(BackupHandshakeMessage.class, (message) -> {
message.setConnection(workerConnection);
return handleHandshakeMessage(message);
});
// Register heartbeat message handler.
workerConnection.handler(BackupHeartbeatMessage.class, this::handleHeartbeatMessage);
// Register connection error listener.
workerConnection.onException((error) -> {
LOG.warn(String.format("Backup-worker connection failed for %s.", workerConnection), error);
});
// Register connection close listener.
workerConnection.onClose((conn) -> {
LOG.info("Backup-worker connection closed for {}.", workerConnection);
// Remove the connection when completed
mBackupWorkerConnections.remove(conn);
String backupWorkerHostname = mBackupWorkerHostNames.remove(conn);
// Fail active backup if it was driven by the closed connection.
if (mBackupTracker.inProgress() && mRemoteBackupConnection != null && mRemoteBackupConnection.equals(conn)) {
LOG.warn("Abandoning current backup as backup-worker: {} is lost.", backupWorkerHostname);
mBackupTracker.updateError(new BackupAbortedException("Backup-worker is lost."));
mRemoteBackupConnection = null;
}
});
// Store follower connection.
// mBackupWorkerHostNames will be updated by handshake message.
mBackupWorkerConnections.add(workerConnection);
}
Aggregations