use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestDomainSocketWatcher method testDeliverNotifications.
/**
* Test that we can get notifications out a DomainSocketWatcher.
*/
@Test(timeout = 180000)
public void testDeliverNotifications() throws Exception {
DomainSocketWatcher watcher = newDomainSocketWatcher(10000000);
DomainSocket[] pair = DomainSocket.socketpair();
final CountDownLatch latch = new CountDownLatch(1);
watcher.add(pair[1], new DomainSocketWatcher.Handler() {
@Override
public boolean handle(DomainSocket sock) {
latch.countDown();
return true;
}
});
pair[0].close();
latch.await();
watcher.close();
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestDataNodeHotSwapVolumes method testAddVolumesConcurrently.
@Test(timeout = 180000)
public void testAddVolumesConcurrently() throws IOException, InterruptedException, TimeoutException, ReconfigurationException {
startDFSCluster(1, 1, 10);
int numVolumes = cluster.getStoragesPerDatanode();
String blockPoolId = cluster.getNamesystem().getBlockPoolId();
Path testFile = new Path("/test");
// Each volume has 2 blocks
int initialBlockCount = numVolumes * 2;
createFile(testFile, initialBlockCount);
DataNode dn = cluster.getDataNodes().get(0);
final FsDatasetSpi<? extends FsVolumeSpi> data = dn.data;
dn.data = Mockito.spy(data);
final int newVolumeCount = 40;
List<Thread> addVolumeDelayedThreads = new ArrayList<>();
AtomicBoolean addVolumeError = new AtomicBoolean(false);
AtomicBoolean listStorageError = new AtomicBoolean(false);
CountDownLatch addVolumeCompletionLatch = new CountDownLatch(newVolumeCount);
// Thread to list all storage available at DataNode,
// when the volumes are being added in parallel.
final Thread listStorageThread = new Thread(new Runnable() {
@Override
public void run() {
while (addVolumeCompletionLatch.getCount() != newVolumeCount) {
int i = 0;
while (i++ < 1000) {
try {
dn.getStorage().listStorageDirectories();
} catch (Exception e) {
listStorageError.set(true);
LOG.error("Error listing storage: " + e);
}
}
}
}
});
listStorageThread.start();
// FsDatasetImpl addVolume mocked to perform the operation asynchronously
doAnswer(new Answer<Object>() {
@Override
public Object answer(InvocationOnMock invocationOnMock) throws Throwable {
final Random r = new Random();
Thread addVolThread = new Thread(new Runnable() {
@Override
public void run() {
try {
r.setSeed(Time.now());
// start after an initial delay.
if (r.nextInt(10) > 4) {
int s = r.nextInt(10) + 1;
Thread.sleep(s * 100);
}
invocationOnMock.callRealMethod();
} catch (Throwable throwable) {
addVolumeError.set(true);
LOG.error("Error adding volume: " + throwable);
} finally {
addVolumeCompletionLatch.countDown();
}
}
});
addVolumeDelayedThreads.add(addVolThread);
addVolThread.start();
return null;
}
}).when(dn.data).addVolume(any(StorageLocation.class), any(List.class));
addVolumes(newVolumeCount, addVolumeCompletionLatch);
numVolumes += newVolumeCount;
// Wait for all addVolume and listStorage Threads to complete
for (Thread t : addVolumeDelayedThreads) {
t.join();
}
listStorageThread.join();
// Verify errors while adding volumes and listing storage directories
Assert.assertEquals("Error adding volumes!", false, addVolumeError.get());
Assert.assertEquals("Error listing storage!", false, listStorageError.get());
int additionalBlockCount = 9;
int totalBlockCount = initialBlockCount + additionalBlockCount;
// Continue to write the same file, thus the new volumes will have blocks.
DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * additionalBlockCount);
verifyFileLength(cluster.getFileSystem(), testFile, totalBlockCount);
List<Map<DatanodeStorage, BlockListAsLongs>> blockReports = cluster.getAllBlockReports(blockPoolId);
assertEquals(1, blockReports.size());
assertEquals(numVolumes, blockReports.get(0).size());
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestDataNodeLifeline method testSendLifelineIfHeartbeatBlocked.
@Test
public void testSendLifelineIfHeartbeatBlocked() throws Exception {
// Run the test for the duration of sending 10 lifeline RPC messages.
int numLifelines = 10;
CountDownLatch lifelinesSent = new CountDownLatch(numLifelines);
// Intercept heartbeat to inject an artificial delay, until all expected
// lifeline RPC messages have been sent.
doAnswer(new LatchAwaitingAnswer<HeartbeatResponse>(lifelinesSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
// Intercept lifeline to trigger latch count-down on each call.
doAnswer(new LatchCountingAnswer<Void>(lifelinesSent)).when(lifelineNamenode).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// that the DataNode always stays alive, and never goes stale or dead.
while (!lifelinesSent.await(1, SECONDS)) {
assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
}
// Verify that we did in fact call the lifeline RPC.
verify(lifelineNamenode, atLeastOnce()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// Also verify lifeline call through metrics. We expect at least
// numLifelines, guaranteed by waiting on the latch. There is a small
// possibility of extra lifeline calls depending on timing, so we allow
// slack in the assertion.
assertTrue("Expect metrics to count at least " + numLifelines + " calls.", getLongCounter("LifelinesNumOps", getMetrics(metrics.name())) >= numLifelines);
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestDataNodeLifeline method testNoLifelineSentIfHeartbeatsOnTime.
@Test
public void testNoLifelineSentIfHeartbeatsOnTime() throws Exception {
// Run the test for the duration of sending 10 heartbeat RPC messages.
int numHeartbeats = 10;
CountDownLatch heartbeatsSent = new CountDownLatch(numHeartbeats);
// Intercept heartbeat to trigger latch count-down on each call.
doAnswer(new LatchCountingAnswer<HeartbeatResponse>(heartbeatsSent)).when(namenode).sendHeartbeat(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class), anyBoolean(), any(SlowPeerReports.class));
// stays alive, and never goes stale or dead.
while (!heartbeatsSent.await(1, SECONDS)) {
assertEquals("Expect DataNode to be kept alive by lifeline.", 1, namesystem.getNumLiveDataNodes());
assertEquals("Expect DataNode not marked dead due to lifeline.", 0, namesystem.getNumDeadDataNodes());
assertEquals("Expect DataNode not marked stale due to lifeline.", 0, namesystem.getNumStaleDataNodes());
}
// Verify that we did not call the lifeline RPC.
verify(lifelineNamenode, never()).sendLifeline(any(DatanodeRegistration.class), any(StorageReport[].class), anyLong(), anyLong(), anyInt(), anyInt(), anyInt(), any(VolumeFailureSummary.class));
// Also verify no lifeline calls through metrics.
assertEquals("Expect metrics to count no lifeline calls.", 0, getLongCounter("LifelinesNumOps", getMetrics(metrics.name())));
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestKill method testKillJob.
@Test
public void testKillJob() throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
MRApp app = new BlockingMRApp(1, 0, latch);
//this will start the job but job won't complete as task is
//blocked
Job job = app.submit(new Configuration());
//wait and vailidate for Job to become RUNNING
app.waitForInternalState((JobImpl) job, JobStateInternal.RUNNING);
//send the kill signal to Job
app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
//unblock Task
latch.countDown();
//wait and validate for Job to be KILLED
app.waitForState(job, JobState.KILLED);
// make sure all events are processed. The AM is stopped
// only when all tasks and task attempts have been killed
app.waitForState(Service.STATE.STOPPED);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("No of tasks is not correct", 1, tasks.size());
Task task = tasks.values().iterator().next();
Assert.assertEquals("Task state not correct", TaskState.KILLED, task.getReport().getTaskState());
Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts();
Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
Iterator<TaskAttempt> it = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, it.next().getReport().getTaskAttemptState());
}
Aggregations