use of org.apache.avro.file.DataFileReader in project flink by apache.
the class AvroOutputFormatITCase method postSubmit.
@Override
protected void postSubmit() throws Exception {
// compare result for specific user type
File[] output1;
File file1 = asFile(outputPath1);
if (file1.isDirectory()) {
output1 = file1.listFiles();
// check for avro ext in dir.
for (File avroOutput : Objects.requireNonNull(output1)) {
Assert.assertTrue("Expect extension '.avro'", avroOutput.toString().endsWith(".avro"));
}
} else {
output1 = new File[] { file1 };
}
List<String> result1 = new ArrayList<>();
DatumReader<User> userDatumReader1 = new SpecificDatumReader<>(User.class);
for (File avroOutput : output1) {
DataFileReader<User> dataFileReader1 = new DataFileReader<>(avroOutput, userDatumReader1);
while (dataFileReader1.hasNext()) {
User user = dataFileReader1.next();
result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result1.contains(expectedResult));
}
// compare result for reflect user type
File[] output2;
File file2 = asFile(outputPath2);
if (file2.isDirectory()) {
output2 = file2.listFiles();
} else {
output2 = new File[] { file2 };
}
List<String> result2 = new ArrayList<>();
DatumReader<ReflectiveUser> userDatumReader2 = new ReflectDatumReader<>(ReflectiveUser.class);
for (File avroOutput : Objects.requireNonNull(output2)) {
DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<>(avroOutput, userDatumReader2);
while (dataFileReader2.hasNext()) {
ReflectiveUser user = dataFileReader2.next();
result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
}
}
for (String expectedResult : userData.split("\n")) {
Assert.assertTrue("expected user " + expectedResult + " not found.", result2.contains(expectedResult));
}
}
use of org.apache.avro.file.DataFileReader in project flink by apache.
the class AvroInputFormat method initReader.
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroHdfsDataWriterTest method testWrite.
@Test
public void testWrite() throws IOException {
// Write all test records
for (String record : TestConstants.JSON_RECORDS) {
this.writer.write(convertRecord(record));
}
Assert.assertEquals(this.writer.recordsWritten(), 3);
this.writer.close();
this.writer.commit();
File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME);
DataFileReader<GenericRecord> reader = new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>());
Schema fileSchema = reader.getSchema();
Assert.assertEquals(fileSchema.getProp(TEST_PROPERTY_KEY), TEST_PROPERTY_VALUE);
// Read the records back and assert they are identical to the ones written
GenericRecord user1 = reader.next();
// Strings are in UTF8, so we have to call toString() here and below
Assert.assertEquals(user1.get("name").toString(), "Alyssa");
Assert.assertEquals(user1.get("favorite_number"), 256);
Assert.assertEquals(user1.get("favorite_color").toString(), "yellow");
GenericRecord user2 = reader.next();
Assert.assertEquals(user2.get("name").toString(), "Ben");
Assert.assertEquals(user2.get("favorite_number"), 7);
Assert.assertEquals(user2.get("favorite_color").toString(), "red");
GenericRecord user3 = reader.next();
Assert.assertEquals(user3.get("name").toString(), "Charlie");
Assert.assertEquals(user3.get("favorite_number"), 68);
Assert.assertEquals(user3.get("favorite_color").toString(), "blue");
reader.close();
FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY));
Assert.assertEquals(metrics.fileInfos.size(), 1);
FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next();
Assert.assertEquals(fileInfo.fileName, TestConstants.TEST_FILE_NAME);
Assert.assertEquals(fileInfo.numRecords, 3);
Assert.assertNull(metrics.partitionInfo.partitionKey);
Assert.assertEquals(metrics.partitionInfo.branchId, 0);
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroStringFieldEncryptorConverterTest method getRecordFromFile.
private GenericRecord getRecordFromFile(String path) throws IOException {
DatumReader<GenericRecord> reader = new GenericDatumReader<>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader);
if (dataFileReader.hasNext()) {
return dataFileReader.next();
}
return null;
}
use of org.apache.avro.file.DataFileReader in project incubator-gobblin by apache.
the class AvroRecursionEliminatingConverterTest method testConversion.
/**
* Test schema and record conversion using a recursive schema
*/
@Test
public void testConversion() throws IOException {
File inputFile = generateRecord();
WorkUnitState workUnitState = new WorkUnitState();
Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/recursive.avsc"));
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(inputSchema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(inputFile, datumReader);
GenericRecord inputRecord = dataFileReader.next();
AvroRecursionEliminatingConverter converter = new AvroRecursionEliminatingConverter();
Schema outputSchema = null;
String recursiveFieldPath = "address.previous_address";
// test that the inner recursive field is present in input schema
Assert.assertTrue(AvroUtils.getFieldSchema(inputSchema, recursiveFieldPath).isPresent());
try {
outputSchema = converter.convertSchema(inputSchema, workUnitState);
// test that the inner recursive field is no longer in the schema
Assert.assertTrue(!AvroUtils.getFieldSchema(outputSchema, recursiveFieldPath).isPresent(), "Inner recursive field " + recursiveFieldPath + " should not be in output schema");
} catch (SchemaConversionException e) {
Assert.fail(e.getMessage());
}
GenericRecord outputRecord = null;
try {
outputRecord = converter.convertRecord(outputSchema, inputRecord, workUnitState).iterator().next();
} catch (DataConversionException e) {
Assert.fail(e.getMessage());
}
checkEquality("address.street_number", inputRecord, 1234, "Different value in input");
checkEquality("address.street_number", outputRecord, 1234, "Different value in output");
checkEquality("name", inputRecord, new Utf8("John"), "Different value in input");
checkEquality("name", outputRecord, new Utf8("John"), "Different value in output");
// check that inner address record exists in input record
checkEquality("address.previous_address.city", inputRecord, new Utf8("San Francisco"), "Different value in input");
checkEquality("address.previous_address", outputRecord, null, "Failed to remove recursive field");
}
Aggregations