use of org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob in project hadoop by apache.
the class TestJobControl method doJobControlTest.
/**
* This is a main function for testing JobControl class.
* It first cleans all the dirs it will use. Then it generates some random text
* data in TestJobControlData/indir. Then it creates 4 jobs:
* Job 1: copy data from indir to outdir_1
* Job 2: copy data from indir to outdir_2
* Job 3: copy data from outdir_1 and outdir_2 to outdir_3
* Job 4: copy data from outdir to outdir_4
* The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1 and 2.
* The job 4 depends on job 3.
*
* Then it creates a JobControl object and add the 4 jobs to the JobControl object.
* Finally, it creates a thread to run the JobControl object and monitors/reports
* the job states.
*/
public static void doJobControlTest() throws Exception {
Configuration defaults = new Configuration();
FileSystem fs = FileSystem.get(defaults);
Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestJobControlData");
Path indir = new Path(rootDataDir, "indir");
Path outdir_1 = new Path(rootDataDir, "outdir_1");
Path outdir_2 = new Path(rootDataDir, "outdir_2");
Path outdir_3 = new Path(rootDataDir, "outdir_3");
Path outdir_4 = new Path(rootDataDir, "outdir_4");
JobControlTestUtils.cleanData(fs, indir);
JobControlTestUtils.generateData(fs, indir);
JobControlTestUtils.cleanData(fs, outdir_1);
JobControlTestUtils.cleanData(fs, outdir_2);
JobControlTestUtils.cleanData(fs, outdir_3);
JobControlTestUtils.cleanData(fs, outdir_4);
ArrayList<Job> dependingJobs = null;
ArrayList<Path> inPaths_1 = new ArrayList<Path>();
inPaths_1.add(indir);
JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
Job job_1 = new Job(jobConf_1, dependingJobs);
ArrayList<Path> inPaths_2 = new ArrayList<Path>();
inPaths_2.add(indir);
JobConf jobConf_2 = JobControlTestUtils.createCopyJob(inPaths_2, outdir_2);
Job job_2 = new Job(jobConf_2, dependingJobs);
ArrayList<Path> inPaths_3 = new ArrayList<Path>();
inPaths_3.add(outdir_1);
inPaths_3.add(outdir_2);
JobConf jobConf_3 = JobControlTestUtils.createCopyJob(inPaths_3, outdir_3);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_1);
dependingJobs.add(job_2);
Job job_3 = new Job(jobConf_3, dependingJobs);
ArrayList<Path> inPaths_4 = new ArrayList<Path>();
inPaths_4.add(outdir_3);
JobConf jobConf_4 = JobControlTestUtils.createCopyJob(inPaths_4, outdir_4);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_3);
Job job_4 = new Job(jobConf_4, dependingJobs);
JobControl theControl = new JobControl("Test");
theControl.addJob((ControlledJob) job_1);
theControl.addJob((ControlledJob) job_2);
theControl.addJob(job_3);
theControl.addJob(job_4);
Thread theController = new Thread(theControl);
theController.start();
while (!theControl.allFinished()) {
System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
System.out.println("\n");
try {
Thread.sleep(5000);
} catch (Exception e) {
}
}
System.out.println("Jobs are all done???");
System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
System.out.println("\n");
if (job_1.getState() != Job.FAILED && job_1.getState() != Job.DEPENDENT_FAILED && job_1.getState() != Job.SUCCESS) {
String states = "job_1: " + job_1.getState() + "\n";
throw new Exception("The state of job_1 is not in a complete state\n" + states);
}
if (job_2.getState() != Job.FAILED && job_2.getState() != Job.DEPENDENT_FAILED && job_2.getState() != Job.SUCCESS) {
String states = "job_2: " + job_2.getState() + "\n";
throw new Exception("The state of job_2 is not in a complete state\n" + states);
}
if (job_3.getState() != Job.FAILED && job_3.getState() != Job.DEPENDENT_FAILED && job_3.getState() != Job.SUCCESS) {
String states = "job_3: " + job_3.getState() + "\n";
throw new Exception("The state of job_3 is not in a complete state\n" + states);
}
if (job_4.getState() != Job.FAILED && job_4.getState() != Job.DEPENDENT_FAILED && job_4.getState() != Job.SUCCESS) {
String states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The state of job_4 is not in a complete state\n" + states);
}
if (job_1.getState() == Job.FAILED || job_2.getState() == Job.FAILED || job_1.getState() == Job.DEPENDENT_FAILED || job_2.getState() == Job.DEPENDENT_FAILED) {
if (job_3.getState() != Job.DEPENDENT_FAILED) {
String states = "job_1: " + job_1.getState() + "\n";
states = "job_2: " + job_2.getState() + "\n";
states = "job_3: " + job_3.getState() + "\n";
states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The states of jobs 1, 2, 3, 4 are not consistent\n" + states);
}
}
if (job_3.getState() == Job.FAILED || job_3.getState() == Job.DEPENDENT_FAILED) {
if (job_4.getState() != Job.DEPENDENT_FAILED) {
String states = "job_3: " + job_3.getState() + "\n";
states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The states of jobs 3, 4 are not consistent\n" + states);
}
}
theControl.stop();
}
use of org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob in project hadoop by apache.
the class ValueAggregatorJob method createValueAggregatorJobs.
public static JobControl createValueAggregatorJobs(String[] args, Class<? extends ValueAggregatorDescriptor>[] descriptors) throws IOException {
JobControl theControl = new JobControl("ValueAggregatorJobs");
ArrayList<ControlledJob> dependingJobs = new ArrayList<ControlledJob>();
Configuration conf = new Configuration();
if (descriptors != null) {
conf = setAggregatorDescriptors(descriptors);
}
Job job = createValueAggregatorJob(conf, args);
ControlledJob cjob = new ControlledJob(job, dependingJobs);
theControl.addJob(cjob);
return theControl;
}
use of org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob in project hadoop by apache.
the class TestJob method testUnexpectedJobStatus.
@Test
public void testUnexpectedJobStatus() throws Exception {
Cluster cluster = mock(Cluster.class);
JobID jobid = new JobID("1014873536921", 6);
ClientProtocol clientProtocol = mock(ClientProtocol.class);
when(cluster.getClient()).thenReturn(clientProtocol);
JobStatus status = new JobStatus(jobid, 0f, 0f, 0f, 0f, State.RUNNING, JobPriority.DEFAULT, "root", "testUnexpectedJobStatus", "job file", "tracking URL");
when(clientProtocol.getJobStatus(jobid)).thenReturn(status);
Job job = Job.getInstance(cluster, status, new JobConf());
// ensurer job status is RUNNING
Assert.assertNotNull(job.getStatus());
Assert.assertTrue(job.getStatus().getState() == State.RUNNING);
// when updating job status, job client could not retrieve
// job status, and status reset to null
when(clientProtocol.getJobStatus(jobid)).thenReturn(null);
try {
job.updateStatus();
} catch (IOException e) {
Assert.assertTrue(e != null && e.getMessage().contains("Job status not available"));
}
try {
ControlledJob cj = new ControlledJob(job, null);
Assert.assertNotNull(cj.toString());
} catch (NullPointerException e) {
Assert.fail("job API fails with NPE");
}
}
Aggregations