Search in sources :

Example 1 with AlluxioOperationThread

use of alluxio.testutils.AlluxioOperationThread in project alluxio by Alluxio.

the class ZookeeperFailureIntegrationTest method zkFailure.

/*
   * This test starts alluxio in HA mode, kills Zookeeper, waits for Alluxio to fail, then restarts
   * Zookeeper. Alluxio should recover when Zookeeper is restarted.
   */
@Test
public void zkFailure() throws Exception {
    mCluster = MultiProcessCluster.newBuilder(PortCoordination.ZOOKEEPER_FAILURE).setClusterName("ZookeeperFailure").setNumMasters(2).setNumWorkers(1).addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS.toString()).build();
    mCluster.start();
    AlluxioOperationThread thread = new AlluxioOperationThread(mCluster.getFileSystemClient());
    thread.start();
    CommonUtils.waitFor("a successful operation to be performed", () -> thread.successes() > 0);
    mCluster.stopZk();
    long zkStopTime = System.currentTimeMillis();
    // Wait until 3 different failures are encountered on the thread.
    // PS: First failures could be related to worker capacity depending on process shutdown order,
    // thus still leaving RPC server reachable.
    AtomicInteger failureCounter = new AtomicInteger(3);
    AtomicReference<Throwable> lastFailure = new AtomicReference<>(null);
    CommonUtils.waitFor("operations to start failing", () -> failureCounter.getAndAdd((lastFailure.getAndSet(thread.getLatestFailure()) != lastFailure.get()) ? -1 : 0) <= 0);
    assertFalse(rpcServiceAvailable());
    LOG.info("First operation failed {}ms after stopping the Zookeeper cluster", System.currentTimeMillis() - zkStopTime);
    final long successes = thread.successes();
    mCluster.restartZk();
    long zkStartTime = System.currentTimeMillis();
    CommonUtils.waitFor("another successful operation to be performed", () -> thread.successes() > successes);
    thread.interrupt();
    thread.join();
    LOG.info("Recovered after {}ms", System.currentTimeMillis() - zkStartTime);
    mCluster.notifySuccess();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AlluxioOperationThread(alluxio.testutils.AlluxioOperationThread) AtomicReference(java.util.concurrent.atomic.AtomicReference) BaseIntegrationTest(alluxio.testutils.BaseIntegrationTest) Test(org.junit.Test)

Example 2 with AlluxioOperationThread

use of alluxio.testutils.AlluxioOperationThread in project alluxio by Alluxio.

the class JournalBackupIntegrationTest method backupRestoreTest.

private void backupRestoreTest(boolean testFailover) throws Exception {
    File backups = AlluxioTestDirectory.createTemporaryDirectory("backups");
    mCluster.start();
    List<Thread> opThreads = new ArrayList<>();
    // are happening.
    for (int i = 0; i < 10; i++) {
        AlluxioOperationThread thread = new AlluxioOperationThread(mCluster.getFileSystemClient());
        thread.start();
        opThreads.add(thread);
    }
    try {
        FileSystem fs = mCluster.getFileSystemClient();
        MetaMasterClient metaClient = getMetaClient(mCluster);
        AlluxioURI dir1 = new AlluxioURI("/dir1");
        fs.createDirectory(dir1, CreateDirectoryPOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build());
        AlluxioURI backup1 = metaClient.backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build()).getBackupUri();
        AlluxioURI dir2 = new AlluxioURI("/dir2");
        fs.createDirectory(dir2, CreateDirectoryPOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build());
        AlluxioURI backup2 = metaClient.backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build()).getBackupUri();
        restartMastersFromBackup(backup2);
        assertTrue(fs.exists(dir1));
        assertTrue(fs.exists(dir2));
        restartMastersFromBackup(backup1);
        assertTrue(fs.exists(dir1));
        assertFalse(fs.exists(dir2));
        // Restart normally and make sure we remember the state from backup 1.
        mCluster.stopMasters();
        mCluster.startMasters();
        assertTrue(fs.exists(dir1));
        assertFalse(fs.exists(dir2));
        if (testFailover) {
            // Verify that failover works correctly.
            mCluster.waitForAndKillPrimaryMaster(30 * Constants.SECOND_MS);
            assertTrue(fs.exists(dir1));
            assertFalse(fs.exists(dir2));
        }
        mCluster.notifySuccess();
    } finally {
        opThreads.forEach(Thread::interrupt);
    }
}
Also used : RetryHandlingMetaMasterClient(alluxio.client.meta.RetryHandlingMetaMasterClient) MetaMasterClient(alluxio.client.meta.MetaMasterClient) FileSystem(alluxio.client.file.FileSystem) ArrayList(java.util.ArrayList) AlluxioOperationThread(alluxio.testutils.AlluxioOperationThread) File(java.io.File) AlluxioOperationThread(alluxio.testutils.AlluxioOperationThread) AlluxioURI(alluxio.AlluxioURI)

Aggregations

AlluxioOperationThread (alluxio.testutils.AlluxioOperationThread)2 AlluxioURI (alluxio.AlluxioURI)1 FileSystem (alluxio.client.file.FileSystem)1 MetaMasterClient (alluxio.client.meta.MetaMasterClient)1 RetryHandlingMetaMasterClient (alluxio.client.meta.RetryHandlingMetaMasterClient)1 BaseIntegrationTest (alluxio.testutils.BaseIntegrationTest)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Test (org.junit.Test)1