use of org.apache.avro.file.DataFileReader in project beam by apache.
the class AvroIOTest method assertTestOutputs.
public static void assertTestOutputs(String[] expectedElements, int numShards, String outputFilePrefix, String shardNameTemplate) throws IOException {
// Validate that the data written matches the expected elements in the expected order
List<File> expectedFiles = new ArrayList<>();
for (int i = 0; i < numShards; i++) {
expectedFiles.add(new File(DefaultFilenamePolicy.constructName(outputFilePrefix, shardNameTemplate, "", /* no suffix */
i, numShards)));
}
List<String> actualElements = new ArrayList<>();
for (File outputFile : expectedFiles) {
assertTrue("Expected output file " + outputFile.getName(), outputFile.exists());
try (DataFileReader<String> reader = new DataFileReader<>(outputFile, new ReflectDatumReader(ReflectData.get().getSchema(String.class)))) {
Iterators.addAll(actualElements, reader);
}
}
assertThat(actualElements, containsInAnyOrder(expectedElements));
}
use of org.apache.avro.file.DataFileReader in project beam by apache.
the class AvroPipelineTest method readGenericFile.
private List<GenericRecord> readGenericFile() throws IOException {
List<GenericRecord> records = Lists.newArrayList();
GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputDir + "-00000-of-00001"), genericDatumReader)) {
for (GenericRecord record : dataFileReader) {
records.add(record);
}
}
return records;
}
use of org.apache.avro.file.DataFileReader in project flink by apache.
the class AvroOutputFormatITCase method postSubmit.
@Override
protected void postSubmit() throws Exception {
//compare result for specific user type
File[] output1;
File file1 = asFile(outputPath1);
if (file1.isDirectory()) {
output1 = file1.listFiles();
// check for avro ext in dir.
for (File avroOutput : output1) {
Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
}
} else {
output1 = new File[] { file1 };
}
List<String> result1 = new ArrayList<String>();
DatumReader<User> userDatumReader1 = new SpecificDatumReader<User>(User.class);
for (File avroOutput : output1) {
DataFileReader<User> dataFileReader1 = new DataFileReader<User>(avroOutput, userDatumReader1);
while (dataFileReader1.hasNext()) {
User user = dataFileReader1.next();
result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
}
//compare result for reflect user type
File[] output2;
File file2 = asFile(outputPath2);
if (file2.isDirectory()) {
output2 = file2.listFiles();
} else {
output2 = new File[] { file2 };
}
List<String> result2 = new ArrayList<String>();
DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<ReflectiveUser>(ReflectiveUser.class);
for (File avroOutput : output2) {
DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<ReflectiveUser>(avroOutput, userDatumReader2);
while (dataFileReader2.hasNext()) {
ReflectiveUser user = dataFileReader2.next();
result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
}
}
use of org.apache.avro.file.DataFileReader in project storm by apache.
the class AvroGenericRecordBoltTest method fileIsGoodAvro.
private void fileIsGoodAvro(Path path) throws IOException {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
FSDataInputStream in = fs.open(path, 0);
FileOutputStream out = new FileOutputStream("target/FOO.avro");
byte[] buffer = new byte[100];
int bytesRead;
while ((bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
out.close();
java.io.File file = new File("target/FOO.avro");
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader);
GenericRecord user = null;
while (dataFileReader.hasNext()) {
user = dataFileReader.next(user);
}
file.delete();
}
use of org.apache.avro.file.DataFileReader in project avro-kafka-storm by ransilberman.
the class MainTest method testDataFile.
@Test
public void testDataFile() throws IOException {
File fileOut = new File("data.avro");
File fileIn = new File("data.avro");
Schema.Parser parser = new Schema.Parser();
Schema schema = parser.parse(getClass().getResourceAsStream("LPEvent.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("revision", 1L);
datum.put("siteId", "28280110");
datum.put("eventType", "PLine");
datum.put("timeStamp", System.currentTimeMillis());
datum.put("sessionId", "123456II");
Map<String, Schema> unions = new HashMap<String, Schema>();
List<Schema> typeList = schema.getField("subrecord").schema().getTypes();
for (Schema sch : typeList) {
unions.put(sch.getName(), sch);
}
GenericRecord plineDatum = new GenericData.Record(unions.get("pline"));
plineDatum.put("text", "How can I help you?");
plineDatum.put("lineType", 1);
plineDatum.put("repId", "REPID12345");
datum.put("subrecord", plineDatum);
//write the file
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
dataFileWriter.create(schema, fileOut);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.append(datum);
dataFileWriter.close();
//read the file
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(fileIn, reader);
assertThat("Scema is the same", schema, is(dataFileReader.getSchema()));
for (GenericRecord record : dataFileReader) {
assertThat(record.get("siteId").toString(), is("28280110"));
assertThat(record.get("eventType").toString(), is("PLine"));
}
}
Aggregations