Search in sources :

Example 16 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project beam by apache.

the class AvroPipelineTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.inputFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
        dataFileWriter.create(schema, outputStream);
        for (GenericRecord record : genericRecords) {
            dataFileWriter.append(record);
        }
    }
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 17 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project drill by apache.

the class AvroTestUtil method generateLinkedList.

public static String generateLinkedList() throws Exception {
    final File file = File.createTempFile("avro-linkedlist", ".avro");
    file.deleteOnExit();
    final Schema schema = SchemaBuilder.record("LongList").namespace("org.apache.drill.exec.store.avro").aliases("LinkedLongs").fields().name("value").type().optional().longType().name("next").type().optional().type("LongList").endRecord();
    final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema));
    writer.create(schema, file);
    GenericRecord previousRecord = null;
    try {
        for (int i = 0; i < RECORD_COUNT; i++) {
            GenericRecord record = (GenericRecord) (previousRecord == null ? new GenericData.Record(schema) : previousRecord.get("next"));
            record.put("value", (long) i);
            if (previousRecord != null) {
                writer.append(previousRecord);
            }
            GenericRecord nextRecord = new GenericData.Record(record.getSchema());
            record.put("next", nextRecord);
            previousRecord = record;
        }
        writer.append(previousRecord);
    } finally {
        writer.close();
    }
    return file.getAbsolutePath();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) GenericData(org.apache.avro.generic.GenericData)

Example 18 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project cdap by caskdata.

the class FileWriterHelper method generateAvroFile.

/**
   * Generate an Avro file of schema (key String, value String) containing the records ("<prefix>i", "#i")
   * for start <= i < end. The file is written using the passed-in output stream.
   */
public static void generateAvroFile(OutputStream out, String prefix, int start, int end) throws IOException {
    Schema schema = Schema.createRecord("kv", null, null, false);
    schema.setFields(ImmutableList.of(new Schema.Field("key", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null)));
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    dataFileWriter.create(schema, out);
    try {
        for (int i = start; i < end; i++) {
            GenericRecord kv = new GenericData.Record(schema);
            kv.put("key", prefix + i);
            kv.put("value", "#" + i);
            dataFileWriter.append(kv);
        }
    } finally {
        Closeables.closeQuietly(dataFileWriter);
        Closeables.closeQuietly(out);
    }
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 19 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project cdap by caskdata.

the class AvroStreamBodyConsumerTest method generateAvroFile.

private File generateAvroFile(File file, int recordCount) throws IOException {
    Schema schema = Schema.createRecord("Record", null, null, false);
    schema.setFields(ImmutableList.of(new Schema.Field("id", Schema.create(Schema.Type.INT), null, null), new Schema.Field("name", Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))), null, null)));
    DataFileWriter<Record> writer = new DataFileWriter<>(new ReflectDatumWriter<>(Record.class));
    try {
        writer.setCodec(CodecFactory.snappyCodec());
        writer.create(schema, file);
        for (int i = 0; i < recordCount; i++) {
            writer.append(new Record(i, "Record number " + i));
        }
    } finally {
        writer.close();
    }
    return file;
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter)

Example 20 with DataFileWriter

use of org.apache.avro.file.DataFileWriter in project spark-dataflow by cloudera.

the class AvroPipelineTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.inputFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
        dataFileWriter.create(schema, outputStream);
        for (GenericRecord record : genericRecords) {
            dataFileWriter.append(record);
        }
    }
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

DataFileWriter (org.apache.avro.file.DataFileWriter)34 Schema (org.apache.avro.Schema)21 GenericRecord (org.apache.avro.generic.GenericRecord)21 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)17 File (java.io.File)14 FileOutputStream (java.io.FileOutputStream)7 SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 IOException (java.io.IOException)4 GenericData (org.apache.avro.generic.GenericData)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 DataFileStream (org.apache.avro.file.DataFileStream)3 Person (org.apache.crunch.test.Person)3 Test (org.junit.Test)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 Random (java.util.Random)2 ThreadLocalRandom (java.util.concurrent.ThreadLocalRandom)2 CodecFactory (org.apache.avro.file.CodecFactory)2