Search in sources :

Example 31 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testAbort.

@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    committer.abortTask(tContext);
    File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());
    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 32 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRMultipleOutputs method _testMultipleOutputs.

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    String input = "a\nb\nc\nd\ne\nc\nd\ne";
    Configuration conf = createJobConf();
    Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);
    job.setJobName("mo");
    MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, withCounters);
    job.setMapperClass(MOMap.class);
    job.setReducerClass(MOReduce.class);
    job.waitForCompletion(true);
    // assert number of named output part files
    int namedOutputCount = 0;
    int valueBasedOutputCount = 0;
    FileSystem fs = OUT_DIR.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(OUT_DIR);
    for (FileStatus status : statuses) {
        String fileName = status.getPath().getName();
        if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) {
            namedOutputCount++;
        } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
            valueBasedOutputCount++;
        }
    }
    assertEquals(9, namedOutputCount);
    assertEquals(5, valueBasedOutputCount);
    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith(TEXT));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);
    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);
    assertEquals(IntWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());
    count = 0;
    IntWritable key = new IntWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals(SEQUENCE, value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);
    if (withCounters) {
        CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
        assertEquals(9, counters.size());
        assertEquals(4, counters.findCounter(TEXT).getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
        assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
        assertEquals(2, counters.findCounter("a").getValue());
        assertEquals(2, counters.findCounter("b").getValue());
        assertEquals(4, counters.findCounter("c").getValue());
        assertEquals(4, counters.findCounter("d").getValue());
        assertEquals(4, counters.findCounter("e").getValue());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) InputStreamReader(java.io.InputStreamReader) CounterGroup(org.apache.hadoop.mapreduce.CounterGroup) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) BufferedReader(java.io.BufferedReader) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable)

Example 33 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testBinary.

@Test
public void testBinary() throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "outseq");
    Random r = new Random();
    long seed = r.nextLong();
    r.setSeed(seed);
    FileOutputFormat.setOutputPath(job, outdir);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClass(job, IntWritable.class);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClass(job, DoubleWritable.class);
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    BytesWritable bkey = new BytesWritable();
    BytesWritable bval = new BytesWritable();
    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, BytesWritable> outputFormat = new SequenceFileAsBinaryOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, BytesWritable> writer = outputFormat.getRecordWriter(context);
    IntWritable iwritable = new IntWritable();
    DoubleWritable dwritable = new DoubleWritable();
    DataOutputBuffer outbuf = new DataOutputBuffer();
    LOG.info("Creating data by SequenceFileAsBinaryOutputFormat");
    try {
        for (int i = 0; i < RECORDS; ++i) {
            iwritable = new IntWritable(r.nextInt());
            iwritable.write(outbuf);
            bkey.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            dwritable = new DoubleWritable(r.nextDouble());
            dwritable.write(outbuf);
            bval.set(outbuf.getData(), 0, outbuf.getLength());
            outbuf.reset();
            writer.write(bkey, bval);
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);
    InputFormat<IntWritable, DoubleWritable> iformat = new SequenceFileInputFormat<IntWritable, DoubleWritable>();
    int count = 0;
    r.setSeed(seed);
    SequenceFileInputFormat.setInputPaths(job, outdir);
    LOG.info("Reading data by SequenceFileInputFormat");
    for (InputSplit split : iformat.getSplits(job)) {
        RecordReader<IntWritable, DoubleWritable> reader = iformat.createRecordReader(split, context);
        MapContext<IntWritable, DoubleWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, DoubleWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            int sourceInt;
            double sourceDouble;
            while (reader.nextKeyValue()) {
                sourceInt = r.nextInt();
                sourceDouble = r.nextDouble();
                iwritable = reader.getCurrentKey();
                dwritable = reader.getCurrentValue();
                assertEquals("Keys don't match: " + "*" + iwritable.get() + ":" + sourceInt + "*", sourceInt, iwritable.get());
                assertTrue("Vals don't match: " + "*" + dwritable.get() + ":" + sourceDouble + "*", Double.compare(dwritable.get(), sourceDouble) == 0);
                ++count;
            }
        } finally {
            reader.close();
        }
    }
    assertEquals("Some records not found", RECORDS, count);
}
Also used : Path(org.apache.hadoop.fs.Path) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) Configuration(org.apache.hadoop.conf.Configuration) MapContextImpl(org.apache.hadoop.mapreduce.task.MapContextImpl) SequenceFileInputFormat(org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat) BytesWritable(org.apache.hadoop.io.BytesWritable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Random(java.util.Random) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 34 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestMRSequenceFileAsBinaryOutputFormat method testcheckOutputSpecsForbidRecordCompression.

@Test
public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
    Job job = Job.getInstance();
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
    fs.delete(outputdir, true);
    // Without outputpath, FileOutputFormat.checkoutputspecs will throw 
    // InvalidJobConfException
    FileOutputFormat.setOutputPath(job, outputdir);
    // SequenceFileAsBinaryOutputFormat doesn't support record compression
    // It should throw an exception when checked by checkOutputSpecs
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    try {
        new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
    } catch (Exception e) {
        fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClass().getName());
    }
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
    try {
        new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
        fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
    } catch (InvalidJobConfException ie) {
    // expected
    } catch (Exception e) {
        fail("Expected " + InvalidJobConfException.class.getName() + "but caught " + e.getClass().getName());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) Job(org.apache.hadoop.mapreduce.Job) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) IOException(java.io.IOException) Test(org.junit.Test)

Example 35 with Job

use of org.apache.hadoop.mapreduce.Job in project hadoop by apache.

the class TestDataDrivenDBInputFormat method testDateSplits.

@Test
public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {
        // delete the table if it already exists.
        s.executeUpdate("DROP TABLE " + DATE_TABLE);
    } catch (SQLException e) {
    }
    // Create the table.
    s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " DATE)");
    s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
    s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
    s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
    s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");
    // commit this tx.
    connection.commit();
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "file:///");
    FileSystem fs = FileSystem.getLocal(conf);
    fs.delete(new Path(OUT_DIR), true);
    // now do a dd import
    Job job = Job.getInstance(conf);
    job.setMapperClass(ValMapper.class);
    job.setReducerClass(Reducer.class);
    job.setMapOutputKeyClass(DateCol.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(DateCol.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(1);
    job.getConfiguration().setInt("mapreduce.map.tasks", 2);
    FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
    DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, null, null);
    DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);
    boolean ret = job.waitForCompletion(true);
    assertTrue("job failed", ret);
    // Check to see that we imported as much as we thought we did.
    assertEquals("Did not get all the records", 4, job.getCounters().findCounter(TaskCounter.REDUCE_OUTPUT_RECORDS).getValue());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SQLException(java.sql.SQLException) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) FileSystem(org.apache.hadoop.fs.FileSystem) Job(org.apache.hadoop.mapreduce.Job) Test(org.junit.Test)

Aggregations

Job (org.apache.hadoop.mapreduce.Job)886 Path (org.apache.hadoop.fs.Path)498 Configuration (org.apache.hadoop.conf.Configuration)434 Test (org.junit.Test)259 IOException (java.io.IOException)135 FileSystem (org.apache.hadoop.fs.FileSystem)128 File (java.io.File)77 InputSplit (org.apache.hadoop.mapreduce.InputSplit)58 ArrayList (java.util.ArrayList)55 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)55 Scan (org.apache.hadoop.hbase.client.Scan)45 FileStatus (org.apache.hadoop.fs.FileStatus)44 NutchJob (org.apache.nutch.util.NutchJob)43 JobConf (org.apache.hadoop.mapred.JobConf)42 Text (org.apache.hadoop.io.Text)39 NutchConfiguration (org.apache.nutch.util.NutchConfiguration)36 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)35 JobContext (org.apache.hadoop.mapreduce.JobContext)35 GenericOptionsParser (org.apache.hadoop.util.GenericOptionsParser)35 CommandLine (org.apache.commons.cli.CommandLine)33