Search in sources :

Example 21 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project crunch by cloudera.

the class AvroFileReaderFactoryTest method testRead_ReflectReader.

@Test
public void testRead_ReflectReader() throws IOException {
    Schema reflectSchema = ReflectData.get().getSchema(PojoPerson.class);
    GenericRecord savedRecord = new GenericData.Record(reflectSchema);
    savedRecord.put("name", "John Doe");
    populateGenericFile(Lists.newArrayList(savedRecord), reflectSchema);
    AvroFileReaderFactory<PojoPerson> genericReader = new AvroFileReaderFactory<PojoPerson>(Avros.reflects(PojoPerson.class), new Configuration());
    Iterator<PojoPerson> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath()));
    PojoPerson person = recordIterator.next();
    assertEquals("John Doe", person.getName());
    assertFalse(recordIterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 22 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project crunch by cloudera.

the class AvroFileReaderFactoryTest method testRead_SpecificReader.

@Test
public void testRead_SpecificReader() throws IOException {
    GenericRecord savedRecord = new GenericData.Record(Person.SCHEMA$);
    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), Person.SCHEMA$);
    AvroFileReaderFactory<Person> genericReader = new AvroFileReaderFactory<Person>(Avros.records(Person.class), new Configuration());
    Iterator<Person> recordIterator = genericReader.read(FileSystem.getLocal(new Configuration()), new Path(this.avroFile.getAbsolutePath()));
    Person expectedPerson = new Person();
    expectedPerson.setAge(42);
    expectedPerson.setName("John Doe");
    List<CharSequence> siblingNames = Lists.newArrayList();
    siblingNames.add("Jimmy");
    siblingNames.add("Jane");
    expectedPerson.setSiblingnames(siblingNames);
    Person person = recordIterator.next();
    assertEquals(expectedPerson, person);
    assertFalse(recordIterator.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Person(org.apache.crunch.test.Person) Test(org.junit.Test)

Example 23 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project crunch by cloudera.

the class AvroFileSourceTargetTest method testGeneric.

@Test
public void testGeneric() throws IOException {
    String genericSchemaJson = Person.SCHEMA$.toString().replace("Person", "GenericPerson");
    Schema genericPersonSchema = new Schema.Parser().parse(genericSchemaJson);
    GenericRecord savedRecord = new GenericData.Record(genericPersonSchema);
    savedRecord.put("name", "John Doe");
    savedRecord.put("age", 42);
    savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
    populateGenericFile(Lists.newArrayList(savedRecord), genericPersonSchema);
    Pipeline pipeline = new MRPipeline(AvroFileSourceTargetTest.class);
    PCollection<Record> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.generics(genericPersonSchema)));
    List<Record> recordList = Lists.newArrayList(genericCollection.materialize());
    assertEquals(Lists.newArrayList(savedRecord), Lists.newArrayList(recordList));
}
Also used : Schema(org.apache.avro.Schema) MRPipeline(org.apache.crunch.impl.mr.MRPipeline) GenericRecord(org.apache.avro.generic.GenericRecord) Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord) Pipeline(org.apache.crunch.Pipeline) MRPipeline(org.apache.crunch.impl.mr.MRPipeline) Test(org.junit.Test)

Example 24 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project crunch by cloudera.

the class AvroFileSourceTargetTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.avroFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(genericDatumWriter);
    dataFileWriter.create(schema, outputStream);
    for (GenericRecord record : genericRecords) {
        dataFileWriter.append(record);
    }
    dataFileWriter.close();
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 25 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project druid by druid-io.

the class InlineSchemaAvroBytesDecoder method parse.

@Override
public GenericRecord parse(ByteBuffer bytes) {
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schemaObj);
    ByteBufferInputStream inputStream = new ByteBufferInputStream(Collections.singletonList(bytes));
    try {
        return reader.read(null, DecoderFactory.get().binaryDecoder(inputStream, null));
    } catch (Exception e) {
        throw new ParseException(e, "Fail to decode avro message!");
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) ByteBufferInputStream(org.apache.avro.util.ByteBufferInputStream) ParseException(io.druid.java.util.common.parsers.ParseException) GenericRecord(org.apache.avro.generic.GenericRecord) ParseException(io.druid.java.util.common.parsers.ParseException)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11