Search in sources :

Example 41 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestChainErrors method testChainSubmission.

/**
   * Tests errors during submission.
   * 
   * @throws Exception
   */
@Test
public void testChainSubmission() throws Exception {
    Configuration conf = createJobConf();
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 0, 0, input);
    job.setJobName("chain");
    Throwable th = null;
    // output key,value classes of first map are not same as that of second map
    try {
        ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, IntWritable.class, Text.class, null);
        ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null);
    } catch (IllegalArgumentException iae) {
        th = iae;
    }
    assertTrue(th != null);
    th = null;
    // same as that of mapper in the chain
    try {
        ChainReducer.setReducer(job, Reducer.class, LongWritable.class, Text.class, IntWritable.class, Text.class, null);
        ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null);
    } catch (IllegalArgumentException iae) {
        th = iae;
    }
    assertTrue(th != null);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 42 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestChainErrors method testChainMapNoOuptut.

/**
   * Tests one of the maps consuming output.
   * 
   * @throws Exception
   */
@Test
public void testChainMapNoOuptut() throws Exception {
    Configuration conf = createJobConf();
    String expectedOutput = "";
    Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 0, input);
    job.setJobName("chain");
    ChainMapper.addMapper(job, ConsumeMap.class, IntWritable.class, Text.class, LongWritable.class, Text.class, null);
    ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null);
    job.waitForCompletion(true);
    assertTrue("Job failed", job.isSuccessful());
    assertEquals("Outputs doesn't match", expectedOutput, MapReduceTestUtil.readOutput(outDir, conf));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Example 43 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestDelegatingInputFormat method testSplitting.

@SuppressWarnings("unchecked")
public void testSplitting() throws Exception {
    Job job = Job.getInstance();
    MiniDFSCluster dfs = null;
    try {
        dfs = new MiniDFSCluster.Builder(job.getConfiguration()).numDataNodes(4).racks(new String[] { "/rack0", "/rack0", "/rack1", "/rack1" }).hosts(new String[] { "host0", "host1", "host2", "host3" }).build();
        FileSystem fs = dfs.getFileSystem();
        Path path = getPath("/foo/bar", fs);
        Path path2 = getPath("/foo/baz", fs);
        Path path3 = getPath("/bar/bar", fs);
        Path path4 = getPath("/bar/baz", fs);
        final int numSplits = 100;
        FileInputFormat.setMaxInputSplitSize(job, fs.getFileStatus(path).getLen() / numSplits);
        MultipleInputs.addInputPath(job, path, TextInputFormat.class, MapClass.class);
        MultipleInputs.addInputPath(job, path2, TextInputFormat.class, MapClass2.class);
        MultipleInputs.addInputPath(job, path3, KeyValueTextInputFormat.class, MapClass.class);
        MultipleInputs.addInputPath(job, path4, TextInputFormat.class, MapClass2.class);
        DelegatingInputFormat inFormat = new DelegatingInputFormat();
        int[] bins = new int[3];
        for (InputSplit split : (List<InputSplit>) inFormat.getSplits(job)) {
            assertTrue(split instanceof TaggedInputSplit);
            final TaggedInputSplit tis = (TaggedInputSplit) split;
            int index = -1;
            if (tis.getInputFormatClass().equals(KeyValueTextInputFormat.class)) {
                // path3
                index = 0;
            } else if (tis.getMapperClass().equals(MapClass.class)) {
                // path
                index = 1;
            } else {
                // path2 and path4
                index = 2;
            }
            bins[index]++;
        }
        assertEquals("count is not equal to num splits", numSplits, bins[0]);
        assertEquals("count is not equal to num splits", numSplits, bins[1]);
        assertEquals("count is not equal to 2 * num splits", numSplits * 2, bins[2]);
    } finally {
        if (dfs != null) {
            dfs.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) FileSystem(org.apache.hadoop.fs.FileSystem) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 44 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestFixedLengthInputFormat method testGzipWithTwoInputs.

/**
   * Test using the gzip codec with two input files.
   */
@Test(timeout = 5000)
public void testGzipWithTwoInputs() throws Exception {
    CompressionCodec gzip = new GzipCodec();
    localFs.delete(workDir, true);
    Job job = Job.getInstance(defaultConf);
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    format.setRecordLength(job.getConfiguration(), 5);
    ReflectionUtils.setConf(gzip, job.getConfiguration());
    FileInputFormat.setInputPaths(job, workDir);
    // Create files with fixed length records with 5 byte long records.
    writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "one  two  threefour five six  seveneightnine ten  ");
    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "ten  nine eightsevensix  five four threetwo  one  ");
    List<InputSplit> splits = format.getSplits(job);
    assertEquals("compressed splits == 2", 2, splits.size());
    FileSplit tmp = (FileSplit) splits.get(0);
    if (tmp.getPath().getName().equals("part2.txt.gz")) {
        splits.set(0, splits.get(1));
        splits.set(1, tmp);
    }
    List<String> results = readSplit(format, splits.get(0), job);
    assertEquals("splits[0] length", 10, results.size());
    assertEquals("splits[0][5]", "six  ", results.get(5));
    results = readSplit(format, splits.get(1), job);
    assertEquals("splits[1] length", 10, results.size());
    assertEquals("splits[1][0]", "ten  ", results.get(0));
    assertEquals("splits[1][1]", "nine ", results.get(1));
}
Also used : Path(org.apache.hadoop.fs.Path) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 45 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestFixedLengthInputFormat method testZeroRecordLength.

/**
   * Test with record length set to 0
   */
@Test(timeout = 5000)
public void testZeroRecordLength() throws Exception {
    localFs.delete(workDir, true);
    Path file = new Path(workDir, new String("testFormat.txt"));
    createFile(file, null, 10, 10);
    Job job = Job.getInstance(defaultConf);
    // Set the fixed length record length config property 
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    format.setRecordLength(job.getConfiguration(), 0);
    FileInputFormat.setInputPaths(job, workDir);
    List<InputSplit> splits = format.getSplits(job);
    boolean exceptionThrown = false;
    for (InputSplit split : splits) {
        try {
            TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
            RecordReader<LongWritable, BytesWritable> reader = format.createRecordReader(split, context);
            MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable> mcontext = new MapContextImpl<LongWritable, BytesWritable, LongWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
            reader.initialize(split, mcontext);
        } catch (IOException ioe) {
            exceptionThrown = true;
            LOG.info("Exception message:" + ioe.getMessage());
        }
    }
    assertTrue("Exception for zero record length:", exceptionThrown);
}
Also used : Path(org.apache.hadoop.fs.Path) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) LongWritable(org.apache.hadoop.io.LongWritable) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33