use of alluxio.testutils.AlluxioOperationThread in project alluxio by Alluxio.
the class ZookeeperFailureIntegrationTest method zkFailure.
/*
* This test starts alluxio in HA mode, kills Zookeeper, waits for Alluxio to fail, then restarts
* Zookeeper. Alluxio should recover when Zookeeper is restarted.
*/
@Test
public void zkFailure() throws Exception {
mCluster = MultiProcessCluster.newBuilder(PortCoordination.ZOOKEEPER_FAILURE).setClusterName("ZookeeperFailure").setNumMasters(2).setNumWorkers(1).addProperty(PropertyKey.MASTER_JOURNAL_TYPE, JournalType.UFS.toString()).build();
mCluster.start();
AlluxioOperationThread thread = new AlluxioOperationThread(mCluster.getFileSystemClient());
thread.start();
CommonUtils.waitFor("a successful operation to be performed", () -> thread.successes() > 0);
mCluster.stopZk();
long zkStopTime = System.currentTimeMillis();
// Wait until 3 different failures are encountered on the thread.
// PS: First failures could be related to worker capacity depending on process shutdown order,
// thus still leaving RPC server reachable.
AtomicInteger failureCounter = new AtomicInteger(3);
AtomicReference<Throwable> lastFailure = new AtomicReference<>(null);
CommonUtils.waitFor("operations to start failing", () -> failureCounter.getAndAdd((lastFailure.getAndSet(thread.getLatestFailure()) != lastFailure.get()) ? -1 : 0) <= 0);
assertFalse(rpcServiceAvailable());
LOG.info("First operation failed {}ms after stopping the Zookeeper cluster", System.currentTimeMillis() - zkStopTime);
final long successes = thread.successes();
mCluster.restartZk();
long zkStartTime = System.currentTimeMillis();
CommonUtils.waitFor("another successful operation to be performed", () -> thread.successes() > successes);
thread.interrupt();
thread.join();
LOG.info("Recovered after {}ms", System.currentTimeMillis() - zkStartTime);
mCluster.notifySuccess();
}
use of alluxio.testutils.AlluxioOperationThread in project alluxio by Alluxio.
the class JournalBackupIntegrationTest method backupRestoreTest.
private void backupRestoreTest(boolean testFailover) throws Exception {
File backups = AlluxioTestDirectory.createTemporaryDirectory("backups");
mCluster.start();
List<Thread> opThreads = new ArrayList<>();
// are happening.
for (int i = 0; i < 10; i++) {
AlluxioOperationThread thread = new AlluxioOperationThread(mCluster.getFileSystemClient());
thread.start();
opThreads.add(thread);
}
try {
FileSystem fs = mCluster.getFileSystemClient();
MetaMasterClient metaClient = getMetaClient(mCluster);
AlluxioURI dir1 = new AlluxioURI("/dir1");
fs.createDirectory(dir1, CreateDirectoryPOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build());
AlluxioURI backup1 = metaClient.backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build()).getBackupUri();
AlluxioURI dir2 = new AlluxioURI("/dir2");
fs.createDirectory(dir2, CreateDirectoryPOptions.newBuilder().setWriteType(WritePType.MUST_CACHE).build());
AlluxioURI backup2 = metaClient.backup(BackupPRequest.newBuilder().setTargetDirectory(backups.getAbsolutePath()).setOptions(BackupPOptions.newBuilder().setLocalFileSystem(false)).build()).getBackupUri();
restartMastersFromBackup(backup2);
assertTrue(fs.exists(dir1));
assertTrue(fs.exists(dir2));
restartMastersFromBackup(backup1);
assertTrue(fs.exists(dir1));
assertFalse(fs.exists(dir2));
// Restart normally and make sure we remember the state from backup 1.
mCluster.stopMasters();
mCluster.startMasters();
assertTrue(fs.exists(dir1));
assertFalse(fs.exists(dir2));
if (testFailover) {
// Verify that failover works correctly.
mCluster.waitForAndKillPrimaryMaster(30 * Constants.SECOND_MS);
assertTrue(fs.exists(dir1));
assertFalse(fs.exists(dir2));
}
mCluster.notifySuccess();
} finally {
opThreads.forEach(Thread::interrupt);
}
}
Aggregations