use of org.apache.avro.generic.GenericRecord in project beam by apache.
the class AvroSourceTest method testSchemaStringIsInterned.
@Test
public void testSchemaStringIsInterned() throws Exception {
List<Bird> birds = createRandomRecords(100);
String filename = generateTestFile("tmp.avro", birds, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
Metadata fileMetadata = FileSystems.matchSingleFileSpec(filename);
String schemaA = AvroSource.readMetadataFromFile(fileMetadata.resourceId()).getSchemaString();
String schemaB = AvroSource.readMetadataFromFile(fileMetadata.resourceId()).getSchemaString();
assertNotSame(schemaA, schemaB);
AvroSource<GenericRecord> sourceA = AvroSource.from(filename).withSchema(schemaA);
AvroSource<GenericRecord> sourceB = AvroSource.from(filename).withSchema(schemaB);
assertSame(sourceA.getSchema(), sourceB.getSchema());
// Ensure that deserialization still goes through interning
AvroSource<GenericRecord> sourceC = SerializableUtils.clone(sourceB);
assertSame(sourceA.getSchema(), sourceC.getSchema());
}
use of org.apache.avro.generic.GenericRecord in project beam by apache.
the class FakeJobService method writeRowsHelper.
private void writeRowsHelper(List<TableRow> rows, Schema avroSchema, String destinationPattern, int shard) throws IOException {
String filename = destinationPattern.replace("*", String.format("%012d", shard));
try (WritableByteChannel channel = FileSystems.create(FileSystems.matchNewResource(filename, false), MimeTypes.BINARY);
DataFileWriter<GenericRecord> tableRowWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)).create(avroSchema, Channels.newOutputStream(channel))) {
for (Map<String, Object> record : rows) {
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
for (Map.Entry<String, Object> field : record.entrySet()) {
genericRecordBuilder.set(field.getKey(), field.getValue());
}
tableRowWriter.append(genericRecordBuilder.build());
}
} catch (IOException e) {
throw new IllegalStateException(String.format("Could not create destination for extract job %s", filename), e);
}
}
use of org.apache.avro.generic.GenericRecord in project beam by apache.
the class BigQueryAvroUtilsTest method testConvertGenericRecordToTableRow.
@Test
public void testConvertGenericRecordToTableRow() throws Exception {
TableSchema tableSchema = new TableSchema();
tableSchema.setFields(fields);
Schema avroSchema = AvroCoder.of(Bird.class).getSchema();
{
// Test nullable fields.
GenericRecord record = new GenericData.Record(avroSchema);
record.put("number", 5L);
TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema);
TableRow row = new TableRow().set("number", "5").set("associates", new ArrayList<TableRow>());
assertEquals(row, convertedRow);
}
{
// Test type conversion for:
// INTEGER, FLOAT, TIMESTAMP, BOOLEAN, BYTES, DATE, DATETIME, TIME.
GenericRecord record = new GenericData.Record(avroSchema);
byte[] soundBytes = "chirp,chirp".getBytes();
ByteBuffer soundByteBuffer = ByteBuffer.wrap(soundBytes);
soundByteBuffer.rewind();
record.put("number", 5L);
record.put("quality", 5.0);
record.put("birthday", 5L);
record.put("flighted", Boolean.TRUE);
record.put("sound", soundByteBuffer);
record.put("anniversaryDate", new Utf8("2000-01-01"));
record.put("anniversaryDatetime", new String("2000-01-01 00:00:00.000005"));
record.put("anniversaryTime", new Utf8("00:00:00.000005"));
TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema);
TableRow row = new TableRow().set("number", "5").set("birthday", "1970-01-01 00:00:00.000005 UTC").set("quality", 5.0).set("associates", new ArrayList<TableRow>()).set("flighted", Boolean.TRUE).set("sound", BaseEncoding.base64().encode(soundBytes)).set("anniversaryDate", "2000-01-01").set("anniversaryDatetime", "2000-01-01 00:00:00.000005").set("anniversaryTime", "00:00:00.000005");
assertEquals(row, convertedRow);
}
{
// Test repeated fields.
Schema subBirdSchema = AvroCoder.of(Bird.SubBird.class).getSchema();
GenericRecord nestedRecord = new GenericData.Record(subBirdSchema);
nestedRecord.put("species", "other");
GenericRecord record = new GenericData.Record(avroSchema);
record.put("number", 5L);
record.put("associates", Lists.<GenericRecord>newArrayList(nestedRecord));
TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema);
TableRow row = new TableRow().set("associates", Lists.<TableRow>newArrayList(new TableRow().set("species", "other"))).set("number", "5");
assertEquals(row, convertedRow);
}
}
use of org.apache.avro.generic.GenericRecord in project beam by apache.
the class AvroPipelineTest method readGenericFile.
private List<GenericRecord> readGenericFile() throws IOException {
List<GenericRecord> records = Lists.newArrayList();
GenericDatumReader<GenericRecord> genericDatumReader = new GenericDatumReader<>();
try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputDir + "-00000-of-00001"), genericDatumReader)) {
for (GenericRecord record : dataFileReader) {
records.add(record);
}
}
return records;
}
use of org.apache.avro.generic.GenericRecord in project beam by apache.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
Aggregations