use of org.apache.avro.io.DatumWriter in project beam by apache.
the class BigQueryIOWriteTest method testWriteAvroWithCustomWriter.
@Test
public void testWriteAvroWithCustomWriter() throws Exception {
if (useStorageApi || useStreaming) {
return;
}
SerializableFunction<AvroWriteRequest<InputRecord>, GenericRecord> formatFunction = r -> {
GenericRecord rec = new GenericData.Record(r.getSchema());
InputRecord i = r.getElement();
rec.put("strVal", i.strVal());
rec.put("longVal", i.longVal());
rec.put("doubleVal", i.doubleVal());
rec.put("instantVal", i.instantVal().getMillis() * 1000);
return rec;
};
SerializableFunction<org.apache.avro.Schema, DatumWriter<GenericRecord>> customWriterFactory = s -> new GenericDatumWriter<GenericRecord>() {
@Override
protected void writeString(org.apache.avro.Schema schema, Object datum, Encoder out) throws IOException {
super.writeString(schema, datum.toString() + "_custom", out);
}
};
p.apply(Create.of(InputRecord.create("test", 1, 1.0, Instant.parse("2019-01-01T00:00:00Z")), InputRecord.create("test2", 2, 2.0, Instant.parse("2019-02-01T00:00:00Z"))).withCoder(INPUT_RECORD_CODER)).apply(BigQueryIO.<InputRecord>write().to("dataset-id.table-id").withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED).withSchema(new TableSchema().setFields(ImmutableList.of(new TableFieldSchema().setName("strVal").setType("STRING"), new TableFieldSchema().setName("longVal").setType("INTEGER"), new TableFieldSchema().setName("doubleVal").setType("FLOAT"), new TableFieldSchema().setName("instantVal").setType("TIMESTAMP")))).withTestServices(fakeBqServices).withAvroWriter(formatFunction, customWriterFactory).withoutValidation());
p.run();
assertThat(fakeDatasetService.getAllRows("project-id", "dataset-id", "table-id"), containsInAnyOrder(new TableRow().set("strVal", "test_custom").set("longVal", "1").set("doubleVal", 1.0D).set("instantVal", "2019-01-01 00:00:00 UTC"), new TableRow().set("strVal", "test2_custom").set("longVal", "2").set("doubleVal", 2.0D).set("instantVal", "2019-02-01 00:00:00 UTC")));
}
use of org.apache.avro.io.DatumWriter in project registry by hortonworks.
the class DefaultAvroSerDesHandler method handlePayloadSerialization.
@Override
public void handlePayloadSerialization(OutputStream outputStream, Object input) {
try {
Schema schema = AvroUtils.computeSchema(input);
Schema.Type schemaType = schema.getType();
if (Schema.Type.BYTES.equals(schemaType)) {
// incase of byte arrays, no need to go through avro as there is not much to optimize and avro is expecting
// the payload to be ByteBuffer instead of a byte array
outputStream.write((byte[]) input);
} else if (Schema.Type.STRING.equals(schemaType)) {
// get UTF-8 bytes and directly send those over instead of using avro.
outputStream.write(input.toString().getBytes("UTF-8"));
} else {
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
DatumWriter<Object> writer;
boolean isSpecificRecord = input instanceof SpecificRecord;
if (isSpecificRecord) {
writer = new SpecificDatumWriter<>(schema);
} else {
writer = new GenericDatumWriter<>(schema);
}
writer.write(input, encoder);
encoder.flush();
}
} catch (IOException e) {
throw new AvroRetryableException(e);
} catch (RuntimeException e) {
throw new AvroException(e);
}
}
use of org.apache.avro.io.DatumWriter in project hive by apache.
the class QTestMiniClusters method getAvroRows.
private static List<byte[]> getAvroRows() {
int numRows = 10;
List<byte[]> events;
final DatumWriter<GenericRecord> writer = new SpecificDatumWriter<>(Wikipedia.getClassSchema());
events = IntStream.rangeClosed(0, numRows).mapToObj(i -> Wikipedia.newBuilder().setTimestamp(formatter.format(new Timestamp(1534736225090L + 1000 * 3600 * i))).setAdded(i * 300).setDeleted(-i).setIsrobot(i % 2 == 0).setChannel("chanel number " + i).setComment("comment number " + i).setCommentlength(i).setDiffurl(String.format("url %s", i)).setFlags("flag").setIsminor(i % 2 > 0).setIsanonymous(i % 3 != 0).setNamespace("namespace").setIsunpatrolled(new Boolean(i % 3 == 0)).setIsnew(new Boolean(i % 2 > 0)).setPage(String.format("page is %s", i * 100)).setDelta(i).setDeltabucket(i * 100.4).setUser("test-user-" + i).build()).map(genericRecord -> {
java.io.ByteArrayOutputStream out = new java.io.ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
try {
writer.write(genericRecord, encoder);
encoder.flush();
out.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return out.toByteArray();
}).collect(Collectors.toList());
return events;
}
Aggregations