use of org.apache.avro.file.DataFileWriter in project beam by apache.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
use of org.apache.avro.file.DataFileWriter in project drill by apache.
the class AvroTestUtil method generateLinkedList.
public static String generateLinkedList() throws Exception {
final File file = File.createTempFile("avro-linkedlist", ".avro");
file.deleteOnExit();
final Schema schema = SchemaBuilder.record("LongList").namespace("org.apache.drill.exec.store.avro").aliases("LinkedLongs").fields().name("value").type().optional().longType().name("next").type().optional().type("LongList").endRecord();
final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema));
writer.create(schema, file);
GenericRecord previousRecord = null;
try {
for (int i = 0; i < RECORD_COUNT; i++) {
GenericRecord record = (GenericRecord) (previousRecord == null ? new GenericData.Record(schema) : previousRecord.get("next"));
record.put("value", (long) i);
if (previousRecord != null) {
writer.append(previousRecord);
}
GenericRecord nextRecord = new GenericData.Record(record.getSchema());
record.put("next", nextRecord);
previousRecord = record;
}
writer.append(previousRecord);
} finally {
writer.close();
}
return file.getAbsolutePath();
}
use of org.apache.avro.file.DataFileWriter in project cdap by caskdata.
the class FileWriterHelper method generateAvroFile.
/**
* Generate an Avro file of schema (key String, value String) containing the records ("<prefix>i", "#i")
* for start <= i < end. The file is written using the passed-in output stream.
*/
public static void generateAvroFile(OutputStream out, String prefix, int start, int end) throws IOException {
Schema schema = Schema.createRecord("kv", null, null, false);
schema.setFields(ImmutableList.of(new Schema.Field("key", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
dataFileWriter.create(schema, out);
try {
for (int i = start; i < end; i++) {
GenericRecord kv = new GenericData.Record(schema);
kv.put("key", prefix + i);
kv.put("value", "#" + i);
dataFileWriter.append(kv);
}
} finally {
Closeables.closeQuietly(dataFileWriter);
Closeables.closeQuietly(out);
}
}
use of org.apache.avro.file.DataFileWriter in project cdap by caskdata.
the class AvroStreamBodyConsumerTest method generateAvroFile.
private File generateAvroFile(File file, int recordCount) throws IOException {
Schema schema = Schema.createRecord("Record", null, null, false);
schema.setFields(ImmutableList.of(new Schema.Field("id", Schema.create(Schema.Type.INT), null, null), new Schema.Field("name", Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))), null, null)));
DataFileWriter<Record> writer = new DataFileWriter<>(new ReflectDatumWriter<>(Record.class));
try {
writer.setCodec(CodecFactory.snappyCodec());
writer.create(schema, file);
for (int i = 0; i < recordCount; i++) {
writer.append(new Record(i, "Record number " + i));
}
} finally {
writer.close();
}
return file;
}
use of org.apache.avro.file.DataFileWriter in project spark-dataflow by cloudera.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
Aggregations