Search in sources :

Example 31 with DataFileReader

use of org.apache.avro.file.DataFileReader in project sling by apache.

the class AvroContentSerializer method readAvroResources.

private Collection<AvroShallowResource> readAvroResources(byte[] bytes) throws IOException {
    DatumReader<AvroShallowResource> datumReader = new SpecificDatumReader<AvroShallowResource>(AvroShallowResource.class);
    DataFileReader<AvroShallowResource> dataFileReader = new DataFileReader<AvroShallowResource>(new SeekableByteArrayInput(bytes), datumReader);
    Collection<AvroShallowResource> avroResources = new LinkedList<AvroShallowResource>();
    try {
        for (AvroShallowResource avroResource : dataFileReader) {
            avroResources.add(avroResource);
        }
    } finally {
        dataFileReader.close();
    }
    return avroResources;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput) LinkedList(java.util.LinkedList)

Example 32 with DataFileReader

use of org.apache.avro.file.DataFileReader in project spark-dataflow by cloudera.

the class AvroPipelineTest method readGenericFile.

private List<GenericRecord> readGenericFile() throws IOException {
    List<GenericRecord> records = Lists.newArrayList();
    GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputDir + "-00000-of-00001"), genericDatumReader)) {
        for (GenericRecord record : dataFileReader) {
            records.add(record);
        }
    }
    return records;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 33 with DataFileReader

use of org.apache.avro.file.DataFileReader in project nifi by apache.

the class TestMergeContent method getGenericRecordMap.

private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException {
    // create a reader for the merged contet
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
    SeekableByteArrayInput input = new SeekableByteArrayInput(data);
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader);
    // read all the records into a map to verify all the records are there
    Map<String, GenericRecord> records = new HashMap<>();
    while (dataFileReader.hasNext()) {
        GenericRecord user = dataFileReader.next();
        records.put(user.get(key).toString(), user);
    }
    return records;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) SeekableByteArrayInput(org.apache.avro.file.SeekableByteArrayInput)

Example 34 with DataFileReader

use of org.apache.avro.file.DataFileReader in project avro-kafka-storm by ransilberman.

the class MainTest method testDataFile.

@Test
public void testDataFile() throws IOException {
    File fileOut = new File("data.avro");
    File fileIn = new File("data.avro");
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("revision", 1L);
    datum.put("siteId", "28280110");
    datum.put("eventType", "PLine");
    datum.put("timeStamp", System.currentTimeMillis());
    datum.put("sessionId", "123456II");
    Map<String, Schema> unions = new HashMap<String, Schema>();
    List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
    for (Schema sch : typeList) {
        unions.put(sch.getName(), sch);
    }
    GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
    plineDatum.put("text", "How can I help you?");
    plineDatum.put("lineType", 1);
    plineDatum.put("repId", "REPID12345");
    datum.put("subrecord", plineDatum);
    // write the file
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
    dataFileWriter.create(schema, fileOut);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.close();
    // read the file
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
    assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
    for (GenericRecord record : dataFileReader) {
        assertThat(record.get("siteId").toString(), is("28280110"));
        assertThat(record.get("eventType").toString(), is("PLine"));
    }
}
Also used : HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 35 with DataFileReader

use of org.apache.avro.file.DataFileReader in project storm by apache.

the class AvroGenericRecordBoltTest method fileIsGoodAvro.

private void fileIsGoodAvro(Path path) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    try (FSDataInputStream in = fs.open(path, 0);
        FileOutputStream out = new FileOutputStream("target/FOO.avro")) {
        byte[] buffer = new byte[100];
        int bytesRead;
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }
    }
    java.io.File file = new File("target/FOO.avro");
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader)) {
        GenericRecord user = null;
        while (dataFileReader.hasNext()) {
            user = dataFileReader.next(user);
        }
    }
    file.delete();
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) FileOutputStream(java.io.FileOutputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) File(java.io.File)

Aggregations

DataFileReader (org.apache.avro.file.DataFileReader)46 GenericRecord (org.apache.avro.generic.GenericRecord)28 File (java.io.File)26 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)21 Schema (org.apache.avro.Schema)20 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)9 IOException (java.io.IOException)8 Test (org.testng.annotations.Test)7 SeekableInput (org.apache.avro.file.SeekableInput)6 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)6 Configuration (org.apache.hadoop.conf.Configuration)6 ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)5 SeekableByteArrayInput (org.apache.avro.file.SeekableByteArrayInput)4 FsInput (org.apache.avro.mapred.FsInput)4 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)4 Utf8 (org.apache.avro.util.Utf8)4 JsonObject (com.google.gson.JsonObject)2 AvroDag (edu.snu.mist.formats.avro.AvroDag)2 Date (java.sql.Date)2