Search in sources :

Example 21 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project cdap by caskdata.

the class DynamicPartitionerWriterWrapper method getTaskAttemptContext.

// returns a TaskAttemptContext whose configuration will reflect the specified newOutputName as the output path
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException {
    Job job = new Job(context.getConfiguration());
    DynamicPartitioningOutputFormat.setOutputName(job, newOutputName);
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }
    Path jobOutputPath = DynamicPartitioningOutputFormat.createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath);
    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) Job(org.apache.hadoop.mapreduce.Job)

Example 22 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project druid by druid-io.

the class DruidParquetInputTest method getFirstRecord.

private GenericRecord getFirstRecord(Job job, String parquetPath) throws IOException, InterruptedException {
    File testFile = new File(parquetPath);
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);
    DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    reader.initialize(split, context);
    reader.nextKeyValue();
    GenericRecord data = (GenericRecord) reader.getCurrentValue();
    reader.close();
    return data;
}
Also used : Path(org.apache.hadoop.fs.Path) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 23 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project druid by druid-io.

the class DruidOrcInputFormatTest method testRead.

@Test
public void testRead() throws IOException, InterruptedException {
    InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
    RecordReader reader = inputFormat.createRecordReader(split, context);
    OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
    reader.initialize(split, context);
    reader.nextKeyValue();
    OrcStruct data = (OrcStruct) reader.getCurrentValue();
    MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
    Assert.assertTrue(row.getEvent().keySet().size() == 4);
    Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
    Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
    Assert.assertEquals(col1, row.getEvent().get("col1"));
    Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
    reader.close();
}
Also used : OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcNewInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat) InputFormat(org.apache.hadoop.mapreduce.InputFormat) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) RecordReader(org.apache.hadoop.mapreduce.RecordReader) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 24 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class TestRecovery method writeBadOutput.

private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception {
    TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
    TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
    RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
    NullWritable nullWritable = NullWritable.get();
    try {
        theRecordWriter.write(key2, val2);
        theRecordWriter.write(null, nullWritable);
        theRecordWriter.write(null, val2);
        theRecordWriter.write(nullWritable, val1);
        theRecordWriter.write(key1, nullWritable);
        theRecordWriter.write(key2, null);
        theRecordWriter.write(null, null);
        theRecordWriter.write(key1, val1);
    } finally {
        theRecordWriter.close(tContext);
    }
    OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
    OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
    committer.commitTask(tContext);
}
Also used : OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) RecordWriter(org.apache.hadoop.mapreduce.RecordWriter) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) OutputFormat(org.apache.hadoop.mapreduce.OutputFormat) TextOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat) FileOutputFormat(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) NullWritable(org.apache.hadoop.io.NullWritable)

Example 25 with TaskAttemptContextImpl

use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.

the class MultipleOutputs method getContext.

// Create a taskAttemptContext for the named output with 
// output format and output key/value types put in the context
private TaskAttemptContext getContext(String nameOutput) throws IOException {
    TaskAttemptContext taskContext = taskContexts.get(nameOutput);
    if (taskContext != null) {
        return taskContext;
    }
    // The following trick leverages the instantiation of a record writer via
    // the job thus supporting arbitrary output formats.
    Job job = Job.getInstance(context.getConfiguration());
    job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
    job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
    job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));
    taskContext = new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID(), new WrappedStatusReporter(context));
    taskContexts.put(nameOutput, taskContext);
    return taskContext;
}
Also used : TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)

Aggregations

TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)47 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)38 Configuration (org.apache.hadoop.conf.Configuration)35 File (java.io.File)25 Job (org.apache.hadoop.mapreduce.Job)23 Path (org.apache.hadoop.fs.Path)22 TaskAttemptID (org.apache.hadoop.mapreduce.TaskAttemptID)22 JobContext (org.apache.hadoop.mapreduce.JobContext)21 JobContextImpl (org.apache.hadoop.mapreduce.task.JobContextImpl)21 Test (org.junit.Test)17 RecordWriter (org.apache.hadoop.mapreduce.RecordWriter)12 LongWritable (org.apache.hadoop.io.LongWritable)11 IOException (java.io.IOException)10 InputSplit (org.apache.hadoop.mapreduce.InputSplit)10 MapFile (org.apache.hadoop.io.MapFile)9 NullWritable (org.apache.hadoop.io.NullWritable)6 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 FileAttribute (java.nio.file.attribute.FileAttribute)4 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)4