use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class BigQueryIOWriteTest method testWriteAvroWithCustomWriter.
@Test
public void testWriteAvroWithCustomWriter() throws Exception {
if (useStorageApi || useStreaming) {
return;
}
SerializableFunction<AvroWriteRequest<InputRecord>, GenericRecord> formatFunction = r -> {
GenericRecord rec = new GenericData.Record(r.getSchema());
InputRecord i = r.getElement();
rec.put("strVal", i.strVal());
rec.put("longVal", i.longVal());
rec.put("doubleVal", i.doubleVal());
rec.put("instantVal", i.instantVal().getMillis() * 1000);
return rec;
};
SerializableFunction<org.apache.avro.Schema, DatumWriter<GenericRecord>> customWriterFactory = s -> new GenericDatumWriter<GenericRecord>() {
@Override
protected void writeString(org.apache.avro.Schema schema, Object datum, Encoder out) throws IOException {
super.writeString(schema, datum.toString() + "_custom", out);
}
};
p.apply(Create.of(InputRecord.create("test", 1, 1.0, Instant.parse("2019-01-01T00:00:00Z")), InputRecord.create("test2", 2, 2.0, Instant.parse("2019-02-01T00:00:00Z"))).withCoder(INPUT_RECORD_CODER)).apply(BigQueryIO.<InputRecord>write().to("dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("strVal").setType("STRING"), new TableFieldSchema().setName("longVal").setType("INTEGER"), new TableFieldSchema().setName("doubleVal").setType("FLOAT"), new TableFieldSchema().setName("instantVal").setType("TIMESTAMP")))).withTestServices(fakeBqServices).withAvroWriter(formatFunction, customWriterFactory).withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("strVal", "test_custom").set("longVal", "1").set("doubleVal", 1.0D).set("instantVal", "2019-01-01 00:00:00 UTC"), new TableRow().set("strVal", "test2_custom").set("longVal", "2").set("doubleVal", 2.0D).set("instantVal", "2019-02-01 00:00:00 UTC")));
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class BigQueryIOStorageReadTest method createResponse.
private static ReadRowsResponse createResponse(Schema schema, Collection<GenericRecord> genericRecords, double progressAtResponseStart, double progressAtResponseEnd) throws Exception {
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder binaryEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, null);
for (GenericRecord genericRecord : genericRecords) {
writer.write(genericRecord, binaryEncoder);
}
binaryEncoder.flush();
return ReadRowsResponse.newBuilder().setAvroRows(AvroRows.newBuilder().setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray())).setRowCount(genericRecords.size())).setRowCount(genericRecords.size()).setStats(StreamStats.newBuilder().setProgress(Progress.newBuilder().setAtResponseStart(progressAtResponseStart).setAtResponseEnd(progressAtResponseEnd))).build();
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class BigQueryIOStorageQueryTest method createResponse.
private static ReadRowsResponse createResponse(Schema schema, Collection<GenericRecord> genericRecords, double progressAtResponseStart, double progressAtResponseEnd) throws Exception {
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder binaryEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, null);
for (GenericRecord genericRecord : genericRecords) {
writer.write(genericRecord, binaryEncoder);
}
binaryEncoder.flush();
return ReadRowsResponse.newBuilder().setAvroRows(AvroRows.newBuilder().setSerializedBinaryRows(ByteString.copyFrom(outputStream.toByteArray())).setRowCount(genericRecords.size())).setRowCount(genericRecords.size()).setStats(StreamStats.newBuilder().setProgress(Progress.newBuilder().setAtResponseStart(progressAtResponseStart).setAtResponseEnd(progressAtResponseEnd))).build();
}
use of org.apache.avro.generic.GenericDatumWriter in project haivvreo by jghoman.
the class AvroContainerOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (HaivvreoException e) {
throw new IOException(e);
}
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
if (isCompressed) {
int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
dfw.setCodec(factory);
}
dfw.create(schema, path.getFileSystem(jobConf).create(path));
return new AvroGenericRecordWriter(dfw);
}
Aggregations