use of org.apache.hadoop.mapreduce.RecordWriter in project hadoop by apache.
the class TestRecovery method writeOutput.
private void writeOutput(TaskAttempt attempt, Configuration conf) throws Exception {
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, TypeConverter.fromYarn(attempt.getID()));
TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(tContext);
NullWritable nullWritable = NullWritable.get();
try {
theRecordWriter.write(key1, val1);
theRecordWriter.write(null, nullWritable);
theRecordWriter.write(null, val1);
theRecordWriter.write(nullWritable, val2);
theRecordWriter.write(key2, nullWritable);
theRecordWriter.write(key1, null);
theRecordWriter.write(null, null);
theRecordWriter.write(key2, val2);
} finally {
theRecordWriter.close(tContext);
}
OutputFormat outputFormat = ReflectionUtils.newInstance(tContext.getOutputFormatClass(), conf);
OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
committer.commitTask(tContext);
}
use of org.apache.hadoop.mapreduce.RecordWriter in project crunch by cloudera.
the class CrunchMultipleOutputs method getRecordWriter.
// by being synchronized MultipleOutputTask can be use with a
// MultithreadedMapper.
@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName) throws IOException, InterruptedException {
// look for record-writer in the cache
RecordWriter writer = recordWriters.get(baseFileName);
// If not in cache, create a new one
if (writer == null) {
// get the record writer from context output format
taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName);
try {
writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), taskContext.getConfiguration())).getRecordWriter(taskContext);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
// to increment counters
if (countersEnabled) {
writer = new RecordWriterWithCounter(writer, baseFileName, context);
}
// add the record-writer to the cache
recordWriters.put(baseFileName, writer);
}
return writer;
}
use of org.apache.hadoop.mapreduce.RecordWriter in project crunch by cloudera.
the class AvroOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Schema schema = null;
String outputName = conf.get("crunch.namedoutput");
if (outputName != null && !outputName.isEmpty()) {
schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
} else {
schema = AvroJob.getOutputSchema(context.getConfiguration());
}
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
@Override
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
WRITER.append(wrapper.datum());
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
WRITER.close();
}
};
}
use of org.apache.hadoop.mapreduce.RecordWriter in project cdap by caskdata.
the class MultiWriter method close.
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
try {
Map<PartitionKey, RecordWriter<?, ?>> recordWriters = new HashMap<>();
recordWriters.putAll(this.recordWriters);
MultipleOutputs.closeRecordWriters(recordWriters, contexts);
taskContext.flushOperations();
} catch (Exception e) {
throw new IOException(e);
} finally {
dynamicPartitioner.destroy();
}
}
use of org.apache.hadoop.mapreduce.RecordWriter in project ignite by apache.
the class HadoopV2Task method prepareWriter.
/**
* Put write into Hadoop context and return associated output format instance.
*
* @param jobCtx Job context.
* @return Output format.
* @throws IgniteCheckedException In case of Grid exception.
* @throws InterruptedException In case of interrupt.
*/
protected OutputFormat prepareWriter(JobContext jobCtx) throws IgniteCheckedException, InterruptedException {
try {
OutputFormat outputFormat = getOutputFormat(jobCtx);
assert outputFormat != null;
OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx);
if (outCommitter != null)
outCommitter.setupTask(hadoopCtx);
RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx);
hadoopCtx.writer(writer);
return outputFormat;
} catch (IOException | ClassNotFoundException e) {
throw new IgniteCheckedException(e);
}
}
Aggregations