use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestKill method testKillTaskWaitKillJobAfterTA_DONE.
@Test
public void testKillTaskWaitKillJobAfterTA_DONE() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
final Dispatcher dispatcher = new MyAsyncDispatch(latch, TaskAttemptEventType.TA_DONE);
MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {
@Override
public Dispatcher createDispatcher() {
return dispatcher;
}
};
Job job = app.submit(new Configuration());
JobId jobId = app.getJobId();
app.waitForState(job, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
Iterator<Task> it = job.getTasks().values().iterator();
Task mapTask = it.next();
Task reduceTask = it.next();
app.waitForState(mapTask, TaskState.RUNNING);
app.waitForState(reduceTask, TaskState.RUNNING);
TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
// The order in the dispatch event queue, from first to last
// TA_DONE
// JobEventType.JOB_KILL
// TaskAttemptEventType.TA_CONTAINER_COMPLETED ( from TA_DONE handling )
// TaskEventType.T_KILL ( from JobEventType.JOB_KILL handling )
// TaskEventType.T_ATTEMPT_SUCCEEDED ( from TA_CONTAINER_COMPLETED handling )
// Finish map
app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
// Now kill the job
app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
//unblock
latch.countDown();
app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestKill method testKillTaskAttempt.
@Test
public void testKillTaskAttempt() throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
MRApp app = new BlockingMRApp(2, 0, latch);
//this will start the job but job won't complete as Task is blocked
Job job = app.submit(new Configuration());
//wait and vailidate for Job to become RUNNING
app.waitForState(job, JobState.RUNNING);
Map<TaskId, Task> tasks = job.getTasks();
Assert.assertEquals("No of tasks is not correct", 2, tasks.size());
Iterator<Task> it = tasks.values().iterator();
Task task1 = it.next();
Task task2 = it.next();
//wait for tasks to become running
app.waitForState(task1, TaskState.SCHEDULED);
app.waitForState(task2, TaskState.SCHEDULED);
//send the kill signal to the first Task's attempt
TaskAttempt attempt = task1.getAttempts().values().iterator().next();
app.getContext().getEventHandler().handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_KILL));
//unblock
latch.countDown();
//wait and validate for Job to become SUCCEEDED
//job will still succeed
app.waitForState(job, JobState.SUCCEEDED);
//first Task will have two attempts 1st is killed, 2nd Succeeds
//both Tasks and Job succeeds
Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task1.getReport().getTaskState());
Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task2.getReport().getTaskState());
Map<TaskAttemptId, TaskAttempt> attempts = task1.getAttempts();
Assert.assertEquals("No of attempts is not correct", 2, attempts.size());
Iterator<TaskAttempt> iter = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.KILLED, iter.next().getReport().getTaskAttemptState());
Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, iter.next().getReport().getTaskAttemptState());
attempts = task2.getAttempts();
Assert.assertEquals("No of attempts is not correct", 1, attempts.size());
iter = attempts.values().iterator();
Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, iter.next().getReport().getTaskAttemptState());
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestKill method testKillTaskWaitKillJobBeforeTA_DONE.
@Test
public void testKillTaskWaitKillJobBeforeTA_DONE() throws Exception {
CountDownLatch latch = new CountDownLatch(1);
final Dispatcher dispatcher = new MyAsyncDispatch(latch, JobEventType.JOB_KILL);
MRApp app = new MRApp(1, 1, false, this.getClass().getName(), true) {
@Override
public Dispatcher createDispatcher() {
return dispatcher;
}
};
Job job = app.submit(new Configuration());
JobId jobId = app.getJobId();
app.waitForState(job, JobState.RUNNING);
Assert.assertEquals("Num tasks not correct", 2, job.getTasks().size());
Iterator<Task> it = job.getTasks().values().iterator();
Task mapTask = it.next();
Task reduceTask = it.next();
app.waitForState(mapTask, TaskState.RUNNING);
app.waitForState(reduceTask, TaskState.RUNNING);
TaskAttempt mapAttempt = mapTask.getAttempts().values().iterator().next();
app.waitForState(mapAttempt, TaskAttemptState.RUNNING);
TaskAttempt reduceAttempt = reduceTask.getAttempts().values().iterator().next();
app.waitForState(reduceAttempt, TaskAttemptState.RUNNING);
// The order in the dispatch event queue, from first to last
// JobEventType.JOB_KILL
// TA_DONE
// TaskEventType.T_KILL ( from JobEventType.JOB_KILL handling )
// TaskAttemptEventType.TA_CONTAINER_COMPLETED ( from TA_DONE handling )
// TaskAttemptEventType.TA_KILL ( from TaskEventType.T_KILL handling )
// TaskEventType.T_ATTEMPT_SUCCEEDED ( from TA_CONTAINER_COMPLETED handling )
// TaskEventType.T_ATTEMPT_KILLED ( from TA_KILL handling )
// Now kill the job
app.getContext().getEventHandler().handle(new JobEvent(jobId, JobEventType.JOB_KILL));
// Finish map
app.getContext().getEventHandler().handle(new TaskAttemptEvent(mapAttempt.getID(), TaskAttemptEventType.TA_DONE));
//unblock
latch.countDown();
app.waitForInternalState((JobImpl) job, JobStateInternal.KILLED);
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class TestNativeAzureFileSystemLive method testDeleteThrowsExceptionWithLeaseExistsErrorMessage.
/**
* Tests fs.delete() function to delete a blob when another blob is holding a
* lease on it. Delete if called without a lease should fail if another process
* is holding a lease and throw appropriate exception
* This is a scenario that would happen in HMaster startup when it tries to
* clean up the temp dirs while the HMaster process which was killed earlier
* held lease on the blob when doing some DDL operation
*/
@Test
public void testDeleteThrowsExceptionWithLeaseExistsErrorMessage() throws Exception {
LOG.info("Starting test");
final String FILE_KEY = "fileWithLease";
// Create the file
Path path = new Path(FILE_KEY);
fs.create(path);
assertTrue(fs.exists(path));
NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs;
final String fullKey = nfs.pathToKey(nfs.makeAbsolute(path));
final AzureNativeFileSystemStore store = nfs.getStore();
// Acquire the lease on the file in a background thread
final CountDownLatch leaseAttemptComplete = new CountDownLatch(1);
final CountDownLatch beginningDeleteAttempt = new CountDownLatch(1);
Thread t = new Thread() {
@Override
public void run() {
// Acquire the lease and then signal the main test thread.
SelfRenewingLease lease = null;
try {
lease = store.acquireLease(fullKey);
LOG.info("Lease acquired: " + lease.getLeaseID());
} catch (AzureException e) {
LOG.warn("Lease acqusition thread unable to acquire lease", e);
} finally {
leaseAttemptComplete.countDown();
}
// Wait for the main test thread to signal it will attempt the delete.
try {
beginningDeleteAttempt.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
// the test covers the case of delete retrying to acquire the lease.
try {
Thread.sleep(SelfRenewingLease.LEASE_ACQUIRE_RETRY_INTERVAL * 3);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
try {
if (lease != null) {
LOG.info("Freeing lease");
lease.free();
}
} catch (StorageException se) {
LOG.warn("Unable to free lease.", se);
}
}
};
// Start the background thread and wait for it to signal the lease is held.
t.start();
try {
leaseAttemptComplete.await();
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
}
// Try to delete the same file
beginningDeleteAttempt.countDown();
store.delete(fullKey);
// At this point file SHOULD BE DELETED
assertFalse(fs.exists(path));
}
use of java.util.concurrent.CountDownLatch in project hadoop by apache.
the class Gridmix method start.
/**
*
* @param conf gridmix configuration
* @param traceIn trace file path(if it is '-', then trace comes from the
* stream stdin)
* @param ioPath Working directory for gridmix. GenerateData job
* will generate data in the directory <ioPath>/input/ and
* distributed cache data is generated in the directory
* <ioPath>/distributedCache/, if -generate option is
* specified.
* @param genbytes size of input data to be generated under the directory
* <ioPath>/input/
* @param userResolver gridmix user resolver
* @return exit code
* @throws IOException
* @throws InterruptedException
*/
int start(Configuration conf, String traceIn, Path ioPath, long genbytes, UserResolver userResolver) throws IOException, InterruptedException {
DataStatistics stats = null;
InputStream trace = null;
int exitCode = 0;
try {
Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix"));
// add shutdown hook for SIGINT, etc.
Runtime.getRuntime().addShutdownHook(sdh);
CountDownLatch startFlag = new CountDownLatch(1);
try {
// Create, start job submission threads
startThreads(conf, traceIn, ioPath, scratchDir, startFlag, userResolver);
Path inputDir = getGridmixInputDataPath(ioPath);
// Write input data if specified
exitCode = writeInputData(genbytes, inputDir);
if (exitCode != 0) {
return exitCode;
}
// publish the data statistics
stats = GenerateData.publishDataStatistics(inputDir, genbytes, conf);
// scan input dir contents
submitter.refreshFilePool();
boolean shouldGenerate = (genbytes > 0);
// set up the needed things for emulation of various loads
exitCode = setupEmulation(conf, traceIn, scratchDir, ioPath, shouldGenerate);
if (exitCode != 0) {
return exitCode;
}
// start the summarizer
summarizer.start(conf);
factory.start();
statistics.start();
} catch (Throwable e) {
LOG.error("Startup failed. " + e.toString() + "\n");
if (LOG.isDebugEnabled()) {
e.printStackTrace();
}
// abort pipeline
if (factory != null)
factory.abort();
exitCode = STARTUP_FAILED_ERROR;
} finally {
// signal for factory to start; sets start time
startFlag.countDown();
}
if (factory != null) {
// wait for input exhaustion
factory.join(Long.MAX_VALUE);
final Throwable badTraceException = factory.error();
if (null != badTraceException) {
LOG.error("Error in trace", badTraceException);
throw new IOException("Error in trace", badTraceException);
}
// wait for pending tasks to be submitted
submitter.shutdown();
submitter.join(Long.MAX_VALUE);
// wait for running tasks to complete
monitor.shutdown();
monitor.join(Long.MAX_VALUE);
statistics.shutdown();
statistics.join(Long.MAX_VALUE);
}
} finally {
if (factory != null) {
summarizer.finalize(factory, traceIn, genbytes, userResolver, stats, conf);
}
IOUtils.cleanup(LOG, trace);
}
return exitCode;
}
Aggregations