Examples with RecordWriter - org.apache.hadoop.mapreduce.RecordWriter

Example 1 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestRecovery method writeOutput.

private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception {
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key1, val1);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val1);
        theRecordWriter.write(nullWritable, val2);
        theRecordWriter.write(key2, nullWritable);
        theRecordWriter.write(key1, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key2, val2);
    } finally {
        theRecordWriter.close(tContext);
    }
    OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
    OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
    committer.commitTask(tContext);
}

Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable)

Example 2 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class MapFileOutputFormat method getRecordWriter.

public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    CompressionCodec codec = null;
    CompressionType compressionType = CompressionType.NONE;
    if (getCompressOutput(context)) {
        // find the kind of compression to do
        compressionType = SequenceFileOutputFormat.getOutputCompressionType(context);
        // find the right codec
        Class<?> codecClass = getOutputCompressorClass(context, DefaultCodec.class);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
    }
    Path file = getDefaultWorkFile(context, "");
    FileSystem fs = file.getFileSystem(conf);
    // ignore the progress parameter, since MapFile is local
    final MapFile.Writer out = new MapFile.Writer(conf, fs, file.toString(), context.getOutputKeyClass().asSubclass(WritableComparable.class), context.getOutputValueClass().asSubclass(Writable.class), compressionType, codec, context);
    return new RecordWriter<WritableComparable<?>, Writable>() {

        public void write(WritableComparable<?> key, Writable value) throws IOException {
            out.append(key, value);
        }

        public void close(TaskAttemptContext context) throws IOException {
            out.close();
        }
    };
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Writable(org.apache.hadoop.io.Writable) MapFile(org.apache.hadoop.io.MapFile) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WritableComparable(org.apache.hadoop.io.WritableComparable) FileSystem(org.apache.hadoop.fs.FileSystem) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) CompressionType(org.apache.hadoop.io.SequenceFile.CompressionType) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter)

Example 3 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testCommitter.

@SuppressWarnings("unchecked")
public void testCommitter() throws Exception {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do commit
    committer.commitTask(tContext);
    committer.commitJob(jContext);
    // validate output
    File expectedFile = new File(new Path(outDir, partFile).toString());
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append(key1).append('\t').append(val1).append("\n");
    expectedOutput.append(val1).append("\n");
    expectedOutput.append(val2).append("\n");
    expectedOutput.append(key2).append("\n");
    expectedOutput.append(key1).append("\n");
    expectedOutput.append(key2).append('\t').append(val2).append("\n");
    String output = UtilsForTests.slurp(expectedFile);
    assertEquals(output, expectedOutput.toString());
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 4 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class TestMRCJCFileOutputCommitter method testAbort.

@SuppressWarnings("unchecked")
public void testAbort() throws IOException, InterruptedException {
    Job job = Job.getInstance();
    FileOutputFormat.setOutputPath(job, outDir);
    Configuration conf = job.getConfiguration();
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
    JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
    FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
    // do setup
    committer.setupJob(jContext);
    committer.setupTask(tContext);
    // write output
    TextOutputFormat theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    writeOutput(theRecordWriter, tContext);
    // do abort
    committer.abortTask(tContext);
    File expectedFile = new File(new Path(committer.getWorkPath(), partFile).toString());
    assertFalse("task temp dir still exists", expectedFile.exists());
    committer.abortJob(jContext, JobStatus.State.FAILED);
    expectedFile = new File(new Path(outDir, FileOutputCommitter.PENDING_DIR_NAME).toString());
    assertFalse("job temp dir still exists", expectedFile.exists());
    assertEquals("Output directory not empty", 0, new File(outDir.toString()).listFiles().length);
    FileUtil.fullyDelete(new File(outDir.toString()));
}

Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JobContext(org.apache.hadoop.mapreduce.JobContext) Job(org.apache.hadoop.mapreduce.Job)

Example 5 with RecordWriter

use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.

the class Chain method addReducer.

/**
   * Add reducer that reads from context and writes to a queue
   */
@SuppressWarnings("unchecked")
void addReducer(TaskInputOutputContext inputContext, ChainBlockingQueue<KeyValuePair<?, ?>> outputQueue) throws IOException, InterruptedException {
    Class<?> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS, Object.class);
    Class<?> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS, Object.class);
    RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, outputQueue, rConf);
    Reducer.Context reducerContext = createReduceContext(rw, (ReduceContext) inputContext, rConf);
    ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw);
    threads.add(runner);
}

Also used : RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) WrappedReducer(org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) Reducer(org.apache.hadoop.mapreduce.Reducer)

Aggregations

RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)26 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)19 Configuration (org.apache.hadoop.conf.Configuration)16 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)12 Job (org.apache.hadoop.mapreduce.Job)11 JobContext (org.apache.hadoop.mapreduce.JobContext)11 IOException (java.io.IOException)10 Path (org.apache.hadoop.fs.Path)10 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)10 MapFile (org.apache.hadoop.io.MapFile)9 File (java.io.File)8 FileSystem (org.apache.hadoop.fs.FileSystem)4 NullWritable (org.apache.hadoop.io.NullWritable)4 OutputCommitter (org.apache.hadoop.mapreduce.OutputCommitter)4 FileOutputCommitter (org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter)4 WritableComparable (org.apache.hadoop.io.WritableComparable)3 OutputFormat (org.apache.hadoop.mapreduce.OutputFormat)3 FileOutputFormat (org.apache.hadoop.mapreduce.lib.output.FileOutputFormat)3 TextOutputFormat (org.apache.hadoop.mapreduce.lib.output.TextOutputFormat)3 FileNotFoundException (java.io.FileNotFoundException)2