use of org.apache.orc.mapreduce.OrcMapreduceRecordWriter in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method writeOrcRecordsInFile.
private void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs) throws Exception {
Configuration configuration = new Configuration();
OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema);
Writer writer = OrcFile.createWriter(path, options);
OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer);
for (OrcStruct orcRecord : orcStructs) {
recordWriter.write(NullWritable.get(), orcRecord);
}
recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID()));
}
use of org.apache.orc.mapreduce.OrcMapreduceRecordWriter in project incubator-gobblin by apache.
the class OrcKeyCompactorOutputFormat method getRecordWriter.
/**
* Required for extension since super method hard-coded file extension as ".orc". To keep flexibility
* of extension name, we made it configuration driven.
* @param taskAttemptContext The source of configuration that determines the file extension
* @return The {@link RecordWriter} that write out Orc object.
* @throws IOException
*/
@Override
public RecordWriter getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException {
Configuration conf = taskAttemptContext.getConfiguration();
String extension = "." + conf.get(COMPACTION_OUTPUT_EXTENSION, "orc");
Path filename = getDefaultWorkFile(taskAttemptContext, extension);
Writer writer = OrcFile.createWriter(filename, org.apache.orc.mapred.OrcOutputFormat.buildOptions(conf).memory(new GobblinOrcMemoryManager(conf)));
int rowBatchSize = conf.getInt(GobblinOrcWriter.ORC_WRITER_BATCH_SIZE, GobblinOrcWriter.DEFAULT_ORC_WRITER_BATCH_SIZE);
log.info("Creating OrcMapreduceRecordWriter with row batch size = {}", rowBatchSize);
return new OrcMapreduceRecordWriter(writer, rowBatchSize);
}
Aggregations