use of org.apache.trevni.ColumnFileMetaData in project trevni by cutting.
the class AvroTrevniOutputFormat method getRecordWriter.
@Override
public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, final JobConf job, final String name, Progressable prog) throws IOException {
boolean isMapOnly = job.getNumReduceTasks() == 0;
final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
final ColumnFileMetaData meta = new ColumnFileMetaData();
for (Map.Entry<String, String> e : job) if (e.getKey().startsWith(META_PREFIX))
meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
final Path dir = FileOutputFormat.getTaskOutputPath(job, name);
final FileSystem fs = dir.getFileSystem(job);
if (!fs.mkdirs(dir))
throw new IOException("Failed to create directory: " + dir);
final long blockSize = fs.getDefaultBlockSize();
return new RecordWriter<AvroWrapper<T>, NullWritable>() {
private int part = 0;
private AvroColumnWriter<T> writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
private void flush() throws IOException {
OutputStream out = fs.create(new Path(dir, "part-" + (part++) + EXT));
try {
writer.writeTo(out);
} finally {
out.close();
}
writer = new AvroColumnWriter<T>(schema, meta, ReflectData.get());
}
public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
writer.write(wrapper.datum());
if (// block full
writer.sizeEstimate() >= blockSize)
flush();
}
public void close(Reporter reporter) throws IOException {
flush();
}
};
}
use of org.apache.trevni.ColumnFileMetaData in project trevni by cutting.
the class CreateRandomTool method run.
@Override
public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception {
if (args.size() != 3) {
err.println("Usage: schemaFile count outputFile");
return 1;
}
File schemaFile = new File(args.get(0));
int count = Integer.parseInt(args.get(1));
File outputFile = new File(args.get(2));
Schema schema = Schema.parse(schemaFile);
AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
for (Object datum : new RandomData(schema, count)) writer.write(datum);
writer.writeTo(outputFile);
return 0;
}
use of org.apache.trevni.ColumnFileMetaData in project trevni by cutting.
the class TestShredder method checkWrite.
private void checkWrite(Schema schema) throws IOException {
AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
int count = 0;
for (Object datum : new RandomData(schema, COUNT)) {
//System.out.println("datum="+datum);
writer.write(datum);
}
writer.writeTo(FILE);
}
use of org.apache.trevni.ColumnFileMetaData in project trevni by cutting.
the class TestCases method runCase.
private void runCase(File dir) throws Exception {
Schema schema = Schema.parse(new File(dir, "input.avsc"));
List<Object> data = fromJson(schema, new File(dir, "input.json"));
// write full data
AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData());
for (Object datum : data) writer.write(datum);
writer.writeTo(FILE);
// test that the full schema reads correctly
checkRead(schema, data);
// test that sub-schemas read correctly
for (File f : dir.listFiles()) if (f.isDirectory()) {
Schema s = Schema.parse(new File(f, "sub.avsc"));
checkRead(s, fromJson(s, new File(f, "sub.json")));
}
}
Aggregations