Search in sources :

Example 6 with DataFileReader

use of org.apache.avro.file.DataFileReader in project beam by apache.

the class AvroIOTest method assertTestOutputs.

public static void assertTestOutputs(String[] expectedElements, int numShards, String outputFilePrefix, String shardNameTemplate) throws IOException {
    // Validate that the data written matches the expected elements in the expected order
    List<File> expectedFiles = new ArrayList<>();
    for (int i = 0; i < numShards; i++) {
        expectedFiles.add(new File(DefaultFilenamePolicy.constructName(outputFilePrefix, shardNameTemplate, "", /* no suffix */
        i, numShards)));
    }
    List<String> actualElements = new ArrayList<>();
    for (File outputFile : expectedFiles) {
        assertTrue("Expected output file " + outputFile.getName(), outputFile.exists());
        try (DataFileReader<String> reader = new DataFileReader<>(outputFile, new ReflectDatumReader(ReflectData.get().getSchema(String.class)))) {
            Iterators.addAll(actualElements, reader);
        }
    }
    assertThat(actualElements, containsInAnyOrder(expectedElements));
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) ArrayList(java.util.ArrayList) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) File(java.io.File)

Example 7 with DataFileReader

use of org.apache.avro.file.DataFileReader in project beam by apache.

the class AvroPipelineTest method readGenericFile.

private List<GenericRecord> readGenericFile() throws IOException {
    List<GenericRecord> records = Lists.newArrayList();
    GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
    try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputDir + "-00000-of-00001"), genericDatumReader)) {
        for (GenericRecord record : dataFileReader) {
            records.add(record);
        }
    }
    return records;
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 8 with DataFileReader

use of org.apache.avro.file.DataFileReader in project flink by apache.

the class AvroOutputFormatITCase method postSubmit.

@Override
protected void postSubmit() throws Exception {
    //compare result for specific user type
    File[] output1;
    File file1 = asFile(outputPath1);
    if (file1.isDirectory()) {
        output1 = file1.listFiles();
        // check for avro ext in dir.
        for (File avroOutput : output1) {
            Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
        }
    } else {
        output1 = new File[] { file1 };
    }
    List<String> result1 = new ArrayList<String>();
    DatumReader<User> userDatumReader1 = new SpecificDatumReader<User>(User.class);
    for (File avroOutput : output1) {
        DataFileReader<User> dataFileReader1 = new DataFileReader<User>(avroOutput, userDatumReader1);
        while (dataFileReader1.hasNext()) {
            User user = dataFileReader1.next();
            result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
    }
    //compare result for reflect user type
    File[] output2;
    File file2 = asFile(outputPath2);
    if (file2.isDirectory()) {
        output2 = file2.listFiles();
    } else {
        output2 = new File[] { file2 };
    }
    List<String> result2 = new ArrayList<String>();
    DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<ReflectiveUser>(ReflectiveUser.class);
    for (File avroOutput : output2) {
        DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<ReflectiveUser>(avroOutput, userDatumReader2);
        while (dataFileReader2.hasNext()) {
            ReflectiveUser user = dataFileReader2.next();
            result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
        }
    }
    for (String expectedResult : userData.split("\n")) {
        Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
    }
}
Also used : User(org.apache.flink.api.io.avro.example.User) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) File(java.io.File)

Example 9 with DataFileReader

use of org.apache.avro.file.DataFileReader in project storm by apache.

the class AvroGenericRecordBoltTest method fileIsGoodAvro.

private void fileIsGoodAvro(Path path) throws IOException {
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
    FSDataInputStream in = fs.open(path, 0);
    FileOutputStream out = new FileOutputStream("target/FOO.avro");
    byte[] buffer = new byte[100];
    int bytesRead;
    while ((bytesRead = in.read(buffer)) > 0) {
        out.write(buffer, 0, bytesRead);
    }
    out.close();
    java.io.File file = new File("target/FOO.avro");
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);
    GenericRecord user = null;
    while (dataFileReader.hasNext()) {
        user = dataFileReader.next(user);
    }
    file.delete();
}
Also used : DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) FileOutputStream(java.io.FileOutputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) File(java.io.File)

Example 10 with DataFileReader

use of org.apache.avro.file.DataFileReader in project avro-kafka-storm by ransilberman.

the class MainTest method testDataFile.

@Test
public void testDataFile() throws IOException {
    File fileOut = new File("data.avro");
    File fileIn = new File("data.avro");
    Schema.Parser parser = new Schema.Parser();
    Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
    GenericRecord datum = new GenericData.Record(schema);
    datum.put("revision", 1L);
    datum.put("siteId", "28280110");
    datum.put("eventType", "PLine");
    datum.put("timeStamp", System.currentTimeMillis());
    datum.put("sessionId", "123456II");
    Map<String, Schema> unions = new HashMap<String, Schema>();
    List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
    for (Schema sch : typeList) {
        unions.put(sch.getName(), sch);
    }
    GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
    plineDatum.put("text", "How can I help you?");
    plineDatum.put("lineType", 1);
    plineDatum.put("repId", "REPID12345");
    datum.put("subrecord", plineDatum);
    //write the file
    DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
    dataFileWriter.create(schema, fileOut);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.append(datum);
    dataFileWriter.close();
    //read the file
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
    assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
    for (GenericRecord record : dataFileReader) {
        assertThat(record.get("siteId").toString(), is("28280110"));
        assertThat(record.get("eventType").toString(), is("PLine"));
    }
}
Also used : HashMap(java.util.HashMap) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Aggregations

DataFileReader (org.apache.avro.file.DataFileReader)13 File (java.io.File)9 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)6 GenericRecord (org.apache.avro.generic.GenericRecord)6 ArrayList (java.util.ArrayList)4 Schema (org.apache.avro.Schema)3 ReflectDatumReader (org.apache.avro.reflect.ReflectDatumReader)3 SpecificDatumReader (org.apache.avro.specific.SpecificDatumReader)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 DataFileWriter (org.apache.avro.file.DataFileWriter)2 GenericData (org.apache.avro.generic.GenericData)2 JSONArray (org.json.JSONArray)2 JSONObject (org.json.JSONObject)2 UnmodifiableIterator (com.google.common.collect.UnmodifiableIterator)1 BufferedInputStream (java.io.BufferedInputStream)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 Path (java.nio.file.Path)1 PreparedStatement (java.sql.PreparedStatement)1