use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestDatamerge method testNestedJoin.
@Test
public void testNestedJoin() throws Exception {
// outer(inner(S1,...,Sn),outer(S1,...Sn))
final int SOURCES = 3;
final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
JobConf job = new JobConf();
Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
int[][] source = new int[SOURCES][];
for (int i = 0; i < SOURCES; ++i) {
source[i] = new int[ITEMS];
for (int j = 0; j < ITEMS; ++j) {
source[i][j] = (i + 2) * (j + 1);
}
}
Path[] src = new Path[SOURCES];
SequenceFile.Writer[] out = createWriters(base, job, SOURCES, src);
IntWritable k = new IntWritable();
for (int i = 0; i < SOURCES; ++i) {
IntWritable v = new IntWritable();
v.set(i);
for (int j = 0; j < ITEMS; ++j) {
k.set(source[i][j]);
out[i].append(k, v);
}
out[i].close();
}
out = null;
StringBuilder sb = new StringBuilder();
sb.append("outer(inner(");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
if (i + 1 != SOURCES)
sb.append(",");
}
sb.append("),outer(");
sb.append(CompositeInputFormat.compose(Fake_IF.class, "foobar"));
sb.append(",");
for (int i = 0; i < SOURCES; ++i) {
sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
sb.append(",");
}
sb.append(CompositeInputFormat.compose(Fake_IF.class, "raboof") + "))");
job.set("mapreduce.join.expr", sb.toString());
job.setInputFormat(CompositeInputFormat.class);
Path outf = new Path(base, "out");
FileOutputFormat.setOutputPath(job, outf);
Fake_IF.setKeyClass(job, IntWritable.class);
Fake_IF.setValClass(job, IntWritable.class);
job.setMapperClass(IdentityMapper.class);
job.setReducerClass(IdentityReducer.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(TupleWritable.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
JobClient.runJob(job);
FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
assertEquals(1, outlist.length);
assertTrue(0 < outlist[0].getLen());
SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), job);
TupleWritable v = new TupleWritable();
while (r.next(k, v)) {
assertFalse(((TupleWritable) v.get(1)).has(0));
assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
boolean chk = true;
int ki = k.get();
for (int i = 2; i < SOURCES + 2; ++i) {
if ((ki % i) == 0 && ki <= i * ITEMS) {
assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
} else
chk = false;
}
if (chk) {
// present in all sources; chk inner
assertTrue(v.has(0));
for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
} else {
// should not be present in inner join
assertFalse(v.has(0));
}
}
r.close();
base.getFileSystem(job).delete(base, true);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestWrappedRecordReaderClassloader method testClassLoader.
/**
* Tests the class loader set by {@link JobConf#setClassLoader(ClassLoader)}
* is inherited by any {@link WrappedRecordReader}s created by
* {@link CompositeRecordReader}
*/
@Test
public void testClassLoader() throws Exception {
JobConf job = new JobConf();
Fake_ClassLoader classLoader = new Fake_ClassLoader();
job.setClassLoader(classLoader);
assertTrue(job.getClassLoader() instanceof Fake_ClassLoader);
FileSystem fs = FileSystem.get(job);
Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
Path base = new Path(testdir, "/empty");
Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
inputFormat.getRecordReader(inputFormat.getSplits(job, 1)[0], job, Reporter.NULL);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestChain method testSetReducerWithReducerByValueAsTrue.
@Test
public void testSetReducerWithReducerByValueAsTrue() throws Exception {
JobConf jobConf = new JobConf();
JobConf reducerConf = new JobConf();
Chain.setReducer(jobConf, MyReducer.class, Object.class, Object.class, Object.class, Object.class, true, reducerConf);
boolean reduceByValue = reducerConf.getBoolean("chain.reducer.byValue", false);
Assert.assertEquals("It should set chain.reducer.byValue as true " + "in reducerConf when we give value as true", true, reduceByValue);
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestJobControl method testGetAssignedJobId.
@Test(timeout = 30000)
public void testGetAssignedJobId() throws Exception {
JobConf jc = new JobConf();
Job j = new Job(jc);
//Just make sure no exception is thrown
assertNull(j.getAssignedJobID());
org.apache.hadoop.mapreduce.Job mockjob = mock(org.apache.hadoop.mapreduce.Job.class);
org.apache.hadoop.mapreduce.JobID jid = new org.apache.hadoop.mapreduce.JobID("test", 0);
when(mockjob.getJobID()).thenReturn(jid);
j.setJob(mockjob);
JobID expected = new JobID("test", 0);
assertEquals(expected, j.getAssignedJobID());
verify(mockjob).getJobID();
}
use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.
the class TestJobControl method doJobControlTest.
/**
* This is a main function for testing JobControl class.
* It first cleans all the dirs it will use. Then it generates some random text
* data in TestJobControlData/indir. Then it creates 4 jobs:
* Job 1: copy data from indir to outdir_1
* Job 2: copy data from indir to outdir_2
* Job 3: copy data from outdir_1 and outdir_2 to outdir_3
* Job 4: copy data from outdir to outdir_4
* The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1 and 2.
* The job 4 depends on job 3.
*
* Then it creates a JobControl object and add the 4 jobs to the JobControl object.
* Finally, it creates a thread to run the JobControl object and monitors/reports
* the job states.
*/
public static void doJobControlTest() throws Exception {
Configuration defaults = new Configuration();
FileSystem fs = FileSystem.get(defaults);
Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestJobControlData");
Path indir = new Path(rootDataDir, "indir");
Path outdir_1 = new Path(rootDataDir, "outdir_1");
Path outdir_2 = new Path(rootDataDir, "outdir_2");
Path outdir_3 = new Path(rootDataDir, "outdir_3");
Path outdir_4 = new Path(rootDataDir, "outdir_4");
JobControlTestUtils.cleanData(fs, indir);
JobControlTestUtils.generateData(fs, indir);
JobControlTestUtils.cleanData(fs, outdir_1);
JobControlTestUtils.cleanData(fs, outdir_2);
JobControlTestUtils.cleanData(fs, outdir_3);
JobControlTestUtils.cleanData(fs, outdir_4);
ArrayList<Job> dependingJobs = null;
ArrayList<Path> inPaths_1 = new ArrayList<Path>();
inPaths_1.add(indir);
JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
Job job_1 = new Job(jobConf_1, dependingJobs);
ArrayList<Path> inPaths_2 = new ArrayList<Path>();
inPaths_2.add(indir);
JobConf jobConf_2 = JobControlTestUtils.createCopyJob(inPaths_2, outdir_2);
Job job_2 = new Job(jobConf_2, dependingJobs);
ArrayList<Path> inPaths_3 = new ArrayList<Path>();
inPaths_3.add(outdir_1);
inPaths_3.add(outdir_2);
JobConf jobConf_3 = JobControlTestUtils.createCopyJob(inPaths_3, outdir_3);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_1);
dependingJobs.add(job_2);
Job job_3 = new Job(jobConf_3, dependingJobs);
ArrayList<Path> inPaths_4 = new ArrayList<Path>();
inPaths_4.add(outdir_3);
JobConf jobConf_4 = JobControlTestUtils.createCopyJob(inPaths_4, outdir_4);
dependingJobs = new ArrayList<Job>();
dependingJobs.add(job_3);
Job job_4 = new Job(jobConf_4, dependingJobs);
JobControl theControl = new JobControl("Test");
theControl.addJob((ControlledJob) job_1);
theControl.addJob((ControlledJob) job_2);
theControl.addJob(job_3);
theControl.addJob(job_4);
Thread theController = new Thread(theControl);
theController.start();
while (!theControl.allFinished()) {
System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
System.out.println("\n");
try {
Thread.sleep(5000);
} catch (Exception e) {
}
}
System.out.println("Jobs are all done???");
System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
System.out.println("\n");
if (job_1.getState() != Job.FAILED && job_1.getState() != Job.DEPENDENT_FAILED && job_1.getState() != Job.SUCCESS) {
String states = "job_1: " + job_1.getState() + "\n";
throw new Exception("The state of job_1 is not in a complete state\n" + states);
}
if (job_2.getState() != Job.FAILED && job_2.getState() != Job.DEPENDENT_FAILED && job_2.getState() != Job.SUCCESS) {
String states = "job_2: " + job_2.getState() + "\n";
throw new Exception("The state of job_2 is not in a complete state\n" + states);
}
if (job_3.getState() != Job.FAILED && job_3.getState() != Job.DEPENDENT_FAILED && job_3.getState() != Job.SUCCESS) {
String states = "job_3: " + job_3.getState() + "\n";
throw new Exception("The state of job_3 is not in a complete state\n" + states);
}
if (job_4.getState() != Job.FAILED && job_4.getState() != Job.DEPENDENT_FAILED && job_4.getState() != Job.SUCCESS) {
String states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The state of job_4 is not in a complete state\n" + states);
}
if (job_1.getState() == Job.FAILED || job_2.getState() == Job.FAILED || job_1.getState() == Job.DEPENDENT_FAILED || job_2.getState() == Job.DEPENDENT_FAILED) {
if (job_3.getState() != Job.DEPENDENT_FAILED) {
String states = "job_1: " + job_1.getState() + "\n";
states = "job_2: " + job_2.getState() + "\n";
states = "job_3: " + job_3.getState() + "\n";
states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The states of jobs 1, 2, 3, 4 are not consistent\n" + states);
}
}
if (job_3.getState() == Job.FAILED || job_3.getState() == Job.DEPENDENT_FAILED) {
if (job_4.getState() != Job.DEPENDENT_FAILED) {
String states = "job_3: " + job_3.getState() + "\n";
states = "job_4: " + job_4.getState() + "\n";
throw new Exception("The states of jobs 3, 4 are not consistent\n" + states);
}
}
theControl.stop();
}
Aggregations