Search in sources :

Example 61 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestDatamerge method testNestedJoin.

@Test
public void testNestedJoin() throws Exception {
    // outer(inner(S1,...,Sn),outer(S1,...Sn))
    final int SOURCES = 3;
    final int ITEMS = (SOURCES + 1) * (SOURCES + 1);
    JobConf job = new JobConf();
    Path base = cluster.getFileSystem().makeQualified(new Path("/nested"));
    int[][] source = new int[SOURCES][];
    for (int i = 0; i < SOURCES; ++i) {
        source[i] = new int[ITEMS];
        for (int j = 0; j < ITEMS; ++j) {
            source[i][j] = (i + 2) * (j + 1);
        }
    }
    Path[] src = new Path[SOURCES];
    SequenceFile.Writer[] out = createWriters(base, job, SOURCES, src);
    IntWritable k = new IntWritable();
    for (int i = 0; i < SOURCES; ++i) {
        IntWritable v = new IntWritable();
        v.set(i);
        for (int j = 0; j < ITEMS; ++j) {
            k.set(source[i][j]);
            out[i].append(k, v);
        }
        out[i].close();
    }
    out = null;
    StringBuilder sb = new StringBuilder();
    sb.append("outer(inner(");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        if (i + 1 != SOURCES)
            sb.append(",");
    }
    sb.append("),outer(");
    sb.append(CompositeInputFormat.compose(Fake_IF.class, "foobar"));
    sb.append(",");
    for (int i = 0; i < SOURCES; ++i) {
        sb.append(CompositeInputFormat.compose(SequenceFileInputFormat.class, src[i].toString()));
        sb.append(",");
    }
    sb.append(CompositeInputFormat.compose(Fake_IF.class, "raboof") + "))");
    job.set("mapreduce.join.expr", sb.toString());
    job.setInputFormat(CompositeInputFormat.class);
    Path outf = new Path(base, "out");
    FileOutputFormat.setOutputPath(job, outf);
    Fake_IF.setKeyClass(job, IntWritable.class);
    Fake_IF.setValClass(job, IntWritable.class);
    job.setMapperClass(IdentityMapper.class);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(TupleWritable.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    JobClient.runJob(job);
    FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter());
    assertEquals(1, outlist.length);
    assertTrue(0 < outlist[0].getLen());
    SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), job);
    TupleWritable v = new TupleWritable();
    while (r.next(k, v)) {
        assertFalse(((TupleWritable) v.get(1)).has(0));
        assertFalse(((TupleWritable) v.get(1)).has(SOURCES + 1));
        boolean chk = true;
        int ki = k.get();
        for (int i = 2; i < SOURCES + 2; ++i) {
            if ((ki % i) == 0 && ki <= i * ITEMS) {
                assertEquals(i - 2, ((IntWritable) ((TupleWritable) v.get(1)).get((i - 1))).get());
            } else
                chk = false;
        }
        if (chk) {
            // present in all sources; chk inner
            assertTrue(v.has(0));
            for (int i = 0; i < SOURCES; ++i) assertTrue(((TupleWritable) v.get(0)).has(i));
        } else {
            // should not be present in inner join
            assertFalse(v.has(0));
        }
    }
    r.close();
    base.getFileSystem(job).delete(base, true);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) SequenceFileInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat) RecordReader(org.apache.hadoop.mapred.RecordReader) SequenceFile(org.apache.hadoop.io.SequenceFile) JobConf(org.apache.hadoop.mapred.JobConf) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 62 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestWrappedRecordReaderClassloader method testClassLoader.

/**
   * Tests the class loader set by {@link JobConf#setClassLoader(ClassLoader)}
   * is inherited by any {@link WrappedRecordReader}s created by
   * {@link CompositeRecordReader}
   */
@Test
public void testClassLoader() throws Exception {
    JobConf job = new JobConf();
    Fake_ClassLoader classLoader = new Fake_ClassLoader();
    job.setClassLoader(classLoader);
    assertTrue(job.getClassLoader() instanceof Fake_ClassLoader);
    FileSystem fs = FileSystem.get(job);
    Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
    Path base = new Path(testdir, "/empty");
    Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
    job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
    CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
    inputFormat.getRecordReader(inputFormat.getSplits(job, 1)[0], job, Reporter.NULL);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) JobConf(org.apache.hadoop.mapred.JobConf) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 63 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestChain method testSetReducerWithReducerByValueAsTrue.

@Test
public void testSetReducerWithReducerByValueAsTrue() throws Exception {
    JobConf jobConf = new JobConf();
    JobConf reducerConf = new JobConf();
    Chain.setReducer(jobConf, MyReducer.class, Object.class, Object.class, Object.class, Object.class, true, reducerConf);
    boolean reduceByValue = reducerConf.getBoolean("chain.reducer.byValue", false);
    Assert.assertEquals("It should set chain.reducer.byValue as true " + "in reducerConf when we give value as true", true, reduceByValue);
}
Also used : JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 64 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestJobControl method testGetAssignedJobId.

@Test(timeout = 30000)
public void testGetAssignedJobId() throws Exception {
    JobConf jc = new JobConf();
    Job j = new Job(jc);
    //Just make sure no exception is thrown
    assertNull(j.getAssignedJobID());
    org.apache.hadoop.mapreduce.Job mockjob = mock(org.apache.hadoop.mapreduce.Job.class);
    org.apache.hadoop.mapreduce.JobID jid = new org.apache.hadoop.mapreduce.JobID("test", 0);
    when(mockjob.getJobID()).thenReturn(jid);
    j.setJob(mockjob);
    JobID expected = new JobID("test", 0);
    assertEquals(expected, j.getAssignedJobID());
    verify(mockjob).getJobID();
}
Also used : ControlledJob(org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapred.JobID) Test(org.junit.Test)

Example 65 with JobConf

use of org.apache.hadoop.mapred.JobConf in project hadoop by apache.

the class TestJobControl method doJobControlTest.

/**
   * This is a main function for testing JobControl class.
   * It first cleans all the dirs it will use. Then it generates some random text
   * data in TestJobControlData/indir. Then it creates 4 jobs: 
   *      Job 1: copy data from indir to outdir_1
   *      Job 2: copy data from indir to outdir_2
   *      Job 3: copy data from outdir_1 and outdir_2 to outdir_3
   *      Job 4: copy data from outdir to outdir_4
   * The jobs 1 and 2 have no dependency. The job 3 depends on jobs 1 and 2.
   * The job 4 depends on job 3.
   * 
   * Then it creates a JobControl object and add the 4 jobs to the JobControl object.
   * Finally, it creates a thread to run the JobControl object and monitors/reports
   * the job states.
   */
public static void doJobControlTest() throws Exception {
    Configuration defaults = new Configuration();
    FileSystem fs = FileSystem.get(defaults);
    Path rootDataDir = new Path(System.getProperty("test.build.data", "."), "TestJobControlData");
    Path indir = new Path(rootDataDir, "indir");
    Path outdir_1 = new Path(rootDataDir, "outdir_1");
    Path outdir_2 = new Path(rootDataDir, "outdir_2");
    Path outdir_3 = new Path(rootDataDir, "outdir_3");
    Path outdir_4 = new Path(rootDataDir, "outdir_4");
    JobControlTestUtils.cleanData(fs, indir);
    JobControlTestUtils.generateData(fs, indir);
    JobControlTestUtils.cleanData(fs, outdir_1);
    JobControlTestUtils.cleanData(fs, outdir_2);
    JobControlTestUtils.cleanData(fs, outdir_3);
    JobControlTestUtils.cleanData(fs, outdir_4);
    ArrayList<Job> dependingJobs = null;
    ArrayList<Path> inPaths_1 = new ArrayList<Path>();
    inPaths_1.add(indir);
    JobConf jobConf_1 = JobControlTestUtils.createCopyJob(inPaths_1, outdir_1);
    Job job_1 = new Job(jobConf_1, dependingJobs);
    ArrayList<Path> inPaths_2 = new ArrayList<Path>();
    inPaths_2.add(indir);
    JobConf jobConf_2 = JobControlTestUtils.createCopyJob(inPaths_2, outdir_2);
    Job job_2 = new Job(jobConf_2, dependingJobs);
    ArrayList<Path> inPaths_3 = new ArrayList<Path>();
    inPaths_3.add(outdir_1);
    inPaths_3.add(outdir_2);
    JobConf jobConf_3 = JobControlTestUtils.createCopyJob(inPaths_3, outdir_3);
    dependingJobs = new ArrayList<Job>();
    dependingJobs.add(job_1);
    dependingJobs.add(job_2);
    Job job_3 = new Job(jobConf_3, dependingJobs);
    ArrayList<Path> inPaths_4 = new ArrayList<Path>();
    inPaths_4.add(outdir_3);
    JobConf jobConf_4 = JobControlTestUtils.createCopyJob(inPaths_4, outdir_4);
    dependingJobs = new ArrayList<Job>();
    dependingJobs.add(job_3);
    Job job_4 = new Job(jobConf_4, dependingJobs);
    JobControl theControl = new JobControl("Test");
    theControl.addJob((ControlledJob) job_1);
    theControl.addJob((ControlledJob) job_2);
    theControl.addJob(job_3);
    theControl.addJob(job_4);
    Thread theController = new Thread(theControl);
    theController.start();
    while (!theControl.allFinished()) {
        System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
        System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
        System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
        System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
        System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
        System.out.println("\n");
        try {
            Thread.sleep(5000);
        } catch (Exception e) {
        }
    }
    System.out.println("Jobs are all done???");
    System.out.println("Jobs in waiting state: " + theControl.getWaitingJobs().size());
    System.out.println("Jobs in ready state: " + theControl.getReadyJobs().size());
    System.out.println("Jobs in running state: " + theControl.getRunningJobs().size());
    System.out.println("Jobs in success state: " + theControl.getSuccessfulJobs().size());
    System.out.println("Jobs in failed state: " + theControl.getFailedJobs().size());
    System.out.println("\n");
    if (job_1.getState() != Job.FAILED && job_1.getState() != Job.DEPENDENT_FAILED && job_1.getState() != Job.SUCCESS) {
        String states = "job_1:  " + job_1.getState() + "\n";
        throw new Exception("The state of job_1 is not in a complete state\n" + states);
    }
    if (job_2.getState() != Job.FAILED && job_2.getState() != Job.DEPENDENT_FAILED && job_2.getState() != Job.SUCCESS) {
        String states = "job_2:  " + job_2.getState() + "\n";
        throw new Exception("The state of job_2 is not in a complete state\n" + states);
    }
    if (job_3.getState() != Job.FAILED && job_3.getState() != Job.DEPENDENT_FAILED && job_3.getState() != Job.SUCCESS) {
        String states = "job_3:  " + job_3.getState() + "\n";
        throw new Exception("The state of job_3 is not in a complete state\n" + states);
    }
    if (job_4.getState() != Job.FAILED && job_4.getState() != Job.DEPENDENT_FAILED && job_4.getState() != Job.SUCCESS) {
        String states = "job_4:  " + job_4.getState() + "\n";
        throw new Exception("The state of job_4 is not in a complete state\n" + states);
    }
    if (job_1.getState() == Job.FAILED || job_2.getState() == Job.FAILED || job_1.getState() == Job.DEPENDENT_FAILED || job_2.getState() == Job.DEPENDENT_FAILED) {
        if (job_3.getState() != Job.DEPENDENT_FAILED) {
            String states = "job_1:  " + job_1.getState() + "\n";
            states = "job_2:  " + job_2.getState() + "\n";
            states = "job_3:  " + job_3.getState() + "\n";
            states = "job_4:  " + job_4.getState() + "\n";
            throw new Exception("The states of jobs 1, 2, 3, 4 are not consistent\n" + states);
        }
    }
    if (job_3.getState() == Job.FAILED || job_3.getState() == Job.DEPENDENT_FAILED) {
        if (job_4.getState() != Job.DEPENDENT_FAILED) {
            String states = "job_3:  " + job_3.getState() + "\n";
            states = "job_4:  " + job_4.getState() + "\n";
            throw new Exception("The states of jobs 3, 4 are not consistent\n" + states);
        }
    }
    theControl.stop();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) ControlledJob(org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

JobConf (org.apache.hadoop.mapred.JobConf)1037 Path (org.apache.hadoop.fs.Path)510 Test (org.junit.Test)317 FileSystem (org.apache.hadoop.fs.FileSystem)264 IOException (java.io.IOException)204 Configuration (org.apache.hadoop.conf.Configuration)163 InputSplit (org.apache.hadoop.mapred.InputSplit)110 ArrayList (java.util.ArrayList)89 Text (org.apache.hadoop.io.Text)82 File (java.io.File)81 RunningJob (org.apache.hadoop.mapred.RunningJob)67 Properties (java.util.Properties)58 List (java.util.List)49 HashMap (java.util.HashMap)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)47 SequenceFile (org.apache.hadoop.io.SequenceFile)45 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)44 Map (java.util.Map)42 Job (org.apache.hadoop.mapreduce.Job)42 LongWritable (org.apache.hadoop.io.LongWritable)41