use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project cdap by caskdata.
the class DynamicPartitionerWriterWrapper method getTaskAttemptContext.
// returns a TaskAttemptContext whose configuration will reflect the specified newOutputName as the output path
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException {
Job job = new Job(context.getConfiguration());
DynamicPartitioningOutputFormat.setOutputName(job, newOutputName);
// AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
if (isAvroOutputFormat(getFileOutputFormat(context))) {
job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
}
Path jobOutputPath = DynamicPartitioningOutputFormat.createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath);
return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}
use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project druid by druid-io.
the class DruidParquetInputTest method getFirstRecord.
private GenericRecord getFirstRecord(Job job, String parquetPath) throws IOException, InterruptedException {
File testFile = new File(parquetPath);
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), null);
DruidParquetInputFormat inputFormat = ReflectionUtils.newInstance(DruidParquetInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
reader.initialize(split, context);
reader.nextKeyValue();
GenericRecord data = (GenericRecord) reader.getCurrentValue();
reader.close();
return data;
}
use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project druid by druid-io.
the class DruidOrcInputFormatTest method testRead.
@Test
public void testRead() throws IOException, InterruptedException {
InputFormat inputFormat = ReflectionUtils.newInstance(OrcNewInputFormat.class, job.getConfiguration());
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = inputFormat.createRecordReader(split, context);
OrcHadoopInputRowParser parser = (OrcHadoopInputRowParser) config.getParser();
reader.initialize(split, context);
reader.nextKeyValue();
OrcStruct data = (OrcStruct) reader.getCurrentValue();
MapBasedInputRow row = (MapBasedInputRow) parser.parse(data);
Assert.assertTrue(row.getEvent().keySet().size() == 4);
Assert.assertEquals(new DateTime(timestamp), row.getTimestamp());
Assert.assertEquals(parser.getParseSpec().getDimensionsSpec().getDimensionNames(), row.getDimensions());
Assert.assertEquals(col1, row.getEvent().get("col1"));
Assert.assertEquals(Arrays.asList(col2), row.getDimension("col2"));
reader.close();
}
use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.
the class TestRecovery method writeBadOutput.
private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key2, val2);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val2);
theRecordWriter.write(nullWritable, val1);
theRecordWriter.write(key1, nullWritable);
theRecordWriter.write(key2, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key1, val1);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
use of org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl in project hadoop by apache.
the class MultipleOutputs method getContext.
// Create a taskAttemptContext for the named output with
// output format and output key/value types put in the context
private TaskAttemptContext getContext(String nameOutput) throws IOException {
TaskAttemptContext taskContext = taskContexts.get(nameOutput);
if (taskContext != null) {
return taskContext;
}
// The following trick leverages the instantiation of a record writer via
// the job thus supporting arbitrary output formats.
Job job = Job.getInstance(context.getConfiguration());
job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));
taskContext = new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID(), new WrappedStatusReporter(context));
taskContexts.put(nameOutput, taskContext);
return taskContext;
}
Aggregations