use of org.apache.avro.mapred.AvroWrapper in project trevni by cutting.
the class AvroTrevniInputFormat method getRecordReader.
@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException {
final FileSplit file = (FileSplit) split;
reporter.setStatus(file.toString());
final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
params.setModel(ReflectData.get());
if (job.get(AvroJob.INPUT_SCHEMA) != null)
params.setSchema(AvroJob.getInputSchema(job));
return new RecordReader<AvroWrapper<T>, NullWritable>() {
private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
private float rows = reader.getRowCount();
private long row;
public AvroWrapper<T> createKey() {
return new AvroWrapper<T>(null);
}
public NullWritable createValue() {
return NullWritable.get();
}
public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
if (!reader.hasNext())
return false;
wrapper.datum(reader.next());
row++;
return true;
}
public float getProgress() throws IOException {
return row / rows;
}
public long getPos() throws IOException {
return row;
}
public void close() throws IOException {
reader.close();
}
};
}
use of org.apache.avro.mapred.AvroWrapper in project trevni by cutting.
the class AvroTrevniOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
boolean isMapOnly = job.getNumReduceTasks() == 0;
final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
final ColumnFileMetaData meta = new ColumnFileMetaData();
for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
final FileSystem fs = dir.getFileSystem(job);
if (!fs.mkdirs(dir))
throw new IOException("Failed to create directory: " + dir);
final long blockSize = fs.getDefaultBlockSize();
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
private int part = 0;
private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
private void flush() throws IOException {
OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
try {
writer.writeTo(out);
} finally {
out.close();
}
writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
}
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
writer.write(wrapper.datum());
if (// block full
writer.sizeEstimate() >= blockSize)
flush();
}
public void close(Reporter reporter) throws IOException {
flush();
}
};
}
use of org.apache.avro.mapred.AvroWrapper in project crunch by cloudera.
the class AvroOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Schema schema = null;
String outputName = conf.get("crunch.namedoutput");
if (outputName != null && !outputName.isEmpty()) {
schema = (new Schema.Parser()).parse(conf.get("avro.output.schema." + outputName));
} else {
schema = AvroJob.getOutputSchema(context.getConfiguration());
}
ReflectDataFactory factory = Avros.getReflectDataFactory(conf);
final DataFileWriter<T> WRITER = new DataFileWriter<T>(factory.<T>getWriter());
Path path = getDefaultWorkFile(context, org.apache.avro.mapred.AvroOutputFormat.EXT);
WRITER.create(schema, path.getFileSystem(context.getConfiguration()).create(path));
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
@Override
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
WRITER.append(wrapper.datum());
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
WRITER.close();
}
};
}
Aggregations