use of org.apache.parquet.avro.AvroParquetReader in project nifi by apache.
the class PutParquetTest method verifyAvroParquetUsers.
private void verifyAvroParquetUsers(final Path avroParquetUsers, final int numExpectedUsers) throws IOException {
final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(avroParquetUsers).withConf(testConf);
int currUser = 0;
try (final ParquetReader<GenericRecord> reader = readerBuilder.build()) {
GenericRecord nextRecord;
while ((nextRecord = reader.read()) != null) {
Assert.assertNotNull(nextRecord);
Assert.assertEquals("name" + currUser, nextRecord.get("name").toString());
Assert.assertEquals(currUser, nextRecord.get("favorite_number"));
Assert.assertEquals("blue" + currUser, nextRecord.get("favorite_color").toString());
currUser++;
}
}
Assert.assertEquals(numExpectedUsers, currUser);
}
use of org.apache.parquet.avro.AvroParquetReader in project components by Talend.
the class MiniDfsResource method assertReadParquetFile.
/**
* Tests that a file on the HDFS cluster contains the given parquet.
*
* @param path the name of the file on the HDFS cluster
* @param expected the expected avro record in the file .
*/
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part) throws IOException {
Path p = new Path(path);
if (fs.isFile(p)) {
try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(), new Path(path))) {
IndexedRecord record = null;
while (null != (record = reader.read())) {
IndexedRecord eqRecord = null;
for (IndexedRecord indexedRecord : expected) {
if (indexedRecord.equals(record)) {
eqRecord = indexedRecord;
break;
}
}
expected.remove(eqRecord);
}
}
// Check before asserting for the message.
if (!part && expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else if (fs.isDirectory(p)) {
for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
}
// Check before asserting for the message.
if (expected.size() != 0)
assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
} else {
fail("No such path: " + path);
}
}
Aggregations