Search in sources :

Example 1 with BackupAbortedException

use of alluxio.exception.BackupAbortedException in project alluxio by Alluxio.

the class JournalBackupIntegrationTest method backupDelegationFailoverProtocol.

// Tests various protocols and configurations for backup delegation during fail-overs.
@Test
public void backupDelegationFailoverProtocol() throws Exception {
    mCluster = MultiProcessCluster.newBuilder(PortCoordination.BACKUP_DELEGATION_FAILOVER_PROTOCOL).setClusterName("backupDelegationFailoverProtocol").setNumMasters(2).addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS.toString()).addProperty(PropertyKey.ZOOKEEPER_SESSION_TIMEOUT, "1sec").addProperty(PropertyKey.MASTER_BACKUP_CONNECT_INTERVAL_MIN, "100ms").addProperty(PropertyKey.MASTER_BACKUP_CONNECT_INTERVAL_MAX, "100ms").addProperty(PropertyKey.MASTER_BACKUP_DELEGATION_ENABLED, "true").addProperty(PropertyKey.MASTER_BACKUP_ABANDON_TIMEOUT, "3sec").build();
    File backups = AlluxioTestDirectory.createTemporaryDirectory("backups");
    mCluster.start();
    // Validate backup works with delegation.
    waitForBackup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build());
    // Find standby master index.
    int primaryIdx = mCluster.getPrimaryMasterIndex(GET_PRIMARY_INDEX_TIMEOUT_MS);
    int followerIdx = (primaryIdx + 1) % 2;
    // Schedule async backup.
    UUID backupId = mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setRunAsync(true)).build()).getBackupId();
    // Kill follower immediately before it sends the next heartbeat to leader.
    mCluster.stopMaster(followerIdx);
    // Wait until backup is abandoned.
    CommonUtils.waitForResult("Backup abandoned.", () -> {
        try {
            return mCluster.getMetaMasterClient().getBackupStatus(backupId);
        } catch (Exception e) {
            throw new RuntimeException(String.format("Unexpected error while getting backup status: %s", e.toString()));
        }
    }, (backupStatus) -> backupStatus.getError() instanceof BackupAbortedException);
    // Restart follower to restore HA.
    mCluster.startMaster(followerIdx);
    // Validate delegated backup works again.
    waitForBackup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build());
    // Schedule async backup.
    mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setRunAsync(true)).build()).getBackupId();
    // Kill leader immediately before it receives the next heartbeat from backup-worker.
    mCluster.waitForAndKillPrimaryMaster(PRIMARY_KILL_TIMEOUT_MS);
    // Wait until follower steps up.
    assertEquals(mCluster.getPrimaryMasterIndex(GET_PRIMARY_INDEX_TIMEOUT_MS), followerIdx);
    // Follower should step-up without problem and accept backup requests.
    mCluster.getMetaMasterClient().backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false).setAllowLeader(true)).build());
    mCluster.notifySuccess();
}
Also used : BackupAbortedException(alluxio.exception.BackupAbortedException) UUID(java.util.UUID) File(java.io.File) FailedPreconditionException(alluxio.exception.status.FailedPreconditionException) BackupAbortedException(alluxio.exception.BackupAbortedException) IOException(java.io.IOException) BaseIntegrationTest(alluxio.testutils.BaseIntegrationTest) Test(org.junit.Test)

Example 2 with BackupAbortedException

use of alluxio.exception.BackupAbortedException in project alluxio by Alluxio.

the class BackupLeaderRole method activateWorkerConnection.

/**
 * Prepares new follower connection.
 */
private void activateWorkerConnection(GrpcMessagingConnection workerConnection) {
    LOG.info("Backup-leader connected with backup-worker: {}", workerConnection);
    // Register handshake message handler.
    workerConnection.handler(BackupHandshakeMessage.class, (message) -> {
        message.setConnection(workerConnection);
        return handleHandshakeMessage(message);
    });
    // Register heartbeat message handler.
    workerConnection.handler(BackupHeartbeatMessage.class, this::handleHeartbeatMessage);
    // Register connection error listener.
    workerConnection.onException((error) -> {
        LOG.warn(String.format("Backup-worker connection failed for %s.", workerConnection), error);
    });
    // Register connection close listener.
    workerConnection.onClose((conn) -> {
        LOG.info("Backup-worker connection closed for {}.", workerConnection);
        // Remove the connection when completed
        mBackupWorkerConnections.remove(conn);
        String backupWorkerHostname = mBackupWorkerHostNames.remove(conn);
        // Fail active backup if it was driven by the closed connection.
        if (mBackupTracker.inProgress() && mRemoteBackupConnection != null && mRemoteBackupConnection.equals(conn)) {
            LOG.warn("Abandoning current backup as backup-worker: {} is lost.", backupWorkerHostname);
            mBackupTracker.updateError(new BackupAbortedException("Backup-worker is lost."));
            mRemoteBackupConnection = null;
        }
    });
    // Store follower connection.
    // mBackupWorkerHostNames will be updated by handshake message.
    mBackupWorkerConnections.add(workerConnection);
}
Also used : BackupAbortedException(alluxio.exception.BackupAbortedException)

Aggregations

BackupAbortedException (alluxio.exception.BackupAbortedException)2 FailedPreconditionException (alluxio.exception.status.FailedPreconditionException)1 BaseIntegrationTest (alluxio.testutils.BaseIntegrationTest)1 File (java.io.File)1 IOException (java.io.IOException)1 UUID (java.util.UUID)1 Test (org.junit.Test)1