use of org.apache.avro.generic.GenericDatumWriter in project databus by linkedin.
the class RelayEventGenerator method populateEvents.
int populateEvents(String source, short id, GenericRecord record, DbusEventKey key, byte[] schemaId, DbusEventsStatisticsCollector statsCollector, DbusEventBufferAppendable buffer) {
if (record != null && key != null) {
try {
// Serialize the row
ByteArrayOutputStream bos = new ByteArrayOutputStream();
Encoder encoder = new BinaryEncoder(bos);
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(record.getSchema());
writer.write(record, encoder);
byte[] serializedValue = bos.toByteArray();
short pPartitionId = RngUtils.randomPositiveShort();
short lPartitionId = RngUtils.randomPositiveShort();
long timeStamp = System.currentTimeMillis() * 1000000;
buffer.appendEvent(key, pPartitionId, lPartitionId, timeStamp, id, schemaId, serializedValue, false, statsCollector);
return 1;
} catch (IOException io) {
LOG.error("Cannot create byte stream payload: " + source);
}
}
return 0;
}
use of org.apache.avro.generic.GenericDatumWriter in project flink by apache.
the class AvroOutputFormat method open.
@Override
public void open(int taskNumber, int numTasks) throws IOException {
super.open(taskNumber, numTasks);
DatumWriter<E> datumWriter;
Schema schema;
if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
datumWriter = new SpecificDatumWriter<E>(avroValueType);
try {
schema = ((org.apache.avro.specific.SpecificRecordBase) avroValueType.newInstance()).getSchema();
} catch (InstantiationException | IllegalAccessException e) {
throw new RuntimeException(e.getMessage());
}
} else if (org.apache.avro.generic.GenericRecord.class.isAssignableFrom(avroValueType)) {
if (userDefinedSchema == null) {
throw new IllegalStateException("Schema must be set when using Generic Record");
}
datumWriter = new GenericDatumWriter<E>(userDefinedSchema);
schema = userDefinedSchema;
} else {
datumWriter = new ReflectDatumWriter<E>(avroValueType);
schema = ReflectData.get().getSchema(avroValueType);
}
dataFileWriter = new DataFileWriter<E>(datumWriter);
if (codec != null) {
dataFileWriter.setCodec(codec.getCodecFactory());
}
if (userDefinedSchema == null) {
dataFileWriter.create(schema, stream);
} else {
dataFileWriter.create(userDefinedSchema, stream);
}
}
use of org.apache.avro.generic.GenericDatumWriter in project flink by apache.
the class AvroRowDataDeSerializationSchemaTest method testSerializeDeserialize.
@Test
public void testSerializeDeserialize() throws Exception {
final DataType dataType = ROW(FIELD("bool", BOOLEAN()), FIELD("tinyint", TINYINT()), FIELD("smallint", SMALLINT()), FIELD("int", INT()), FIELD("bigint", BIGINT()), FIELD("float", FLOAT()), FIELD("double", DOUBLE()), FIELD("name", STRING()), FIELD("bytes", BYTES()), FIELD("decimal", DECIMAL(19, 6)), FIELD("doubles", ARRAY(DOUBLE())), FIELD("time", TIME(0)), FIELD("date", DATE()), FIELD("timestamp3", TIMESTAMP(3)), FIELD("timestamp3_2", TIMESTAMP(3)), FIELD("map", MAP(STRING(), BIGINT())), FIELD("map2map", MAP(STRING(), MAP(STRING(), INT()))), FIELD("map2array", MAP(STRING(), ARRAY(INT()))), FIELD("nullEntryMap", MAP(STRING(), STRING()))).notNull();
final RowType rowType = (RowType) dataType.getLogicalType();
final Schema schema = AvroSchemaConverter.convertToSchema(rowType);
final GenericRecord record = new GenericData.Record(schema);
record.put(0, true);
record.put(1, (int) Byte.MAX_VALUE);
record.put(2, (int) Short.MAX_VALUE);
record.put(3, 33);
record.put(4, 44L);
record.put(5, 12.34F);
record.put(6, 23.45);
record.put(7, "hello avro");
record.put(8, ByteBuffer.wrap(new byte[] { 1, 2, 4, 5, 6, 7, 8, 12 }));
record.put(9, ByteBuffer.wrap(BigDecimal.valueOf(123456789, 6).unscaledValue().toByteArray()));
List<Double> doubles = new ArrayList<>();
doubles.add(1.2);
doubles.add(3.4);
doubles.add(567.8901);
record.put(10, doubles);
record.put(11, 18397);
record.put(12, 10087);
record.put(13, 1589530213123L);
record.put(14, 1589530213122L);
Map<String, Long> map = new HashMap<>();
map.put("flink", 12L);
map.put("avro", 23L);
record.put(15, map);
Map<String, Map<String, Integer>> map2map = new HashMap<>();
Map<String, Integer> innerMap = new HashMap<>();
innerMap.put("inner_key1", 123);
innerMap.put("inner_key2", 234);
map2map.put("outer_key", innerMap);
record.put(16, map2map);
List<Integer> list1 = Arrays.asList(1, 2, 3, 4, 5, 6);
List<Integer> list2 = Arrays.asList(11, 22, 33, 44, 55);
Map<String, List<Integer>> map2list = new HashMap<>();
map2list.put("list1", list1);
map2list.put("list2", list2);
record.put(17, map2list);
Map<String, String> map2 = new HashMap<>();
map2.put("key1", null);
record.put(18, map2);
AvroRowDataSerializationSchema serializationSchema = createSerializationSchema(dataType);
AvroRowDataDeserializationSchema deserializationSchema = createDeserializationSchema(dataType);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
GenericDatumWriter<IndexedRecord> datumWriter = new GenericDatumWriter<>(schema);
Encoder encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
datumWriter.write(record, encoder);
encoder.flush();
byte[] input = byteArrayOutputStream.toByteArray();
RowData rowData = deserializationSchema.deserialize(input);
byte[] output = serializationSchema.serialize(rowData);
assertArrayEquals(input, output);
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class AvroByteReaderTest method initInputFile.
/**
* Write input elements to a file and return information about the Avro-encoded file.
*/
private <T> AvroFileInfo<T> initInputFile(List<List<T>> elemsList, Coder<T> coder) throws Exception {
File tmpFile = tmpFolder.newFile("file.avro");
AvroFileInfo<T> fileInfo = new AvroFileInfo<>();
fileInfo.filename = tmpFile.getPath();
// Write the data.
OutputStream outStream = Channels.newOutputStream(FileSystems.create(FileSystems.matchNewResource(fileInfo.filename, false), MimeTypes.BINARY));
Schema schema = Schema.create(Schema.Type.BYTES);
DatumWriter<ByteBuffer> datumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<ByteBuffer> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.create(schema, outStream);
boolean first = true;
for (List<T> elems : elemsList) {
if (first) {
first = false;
} else {
// Ensure a block boundary here.
long syncPoint = fileWriter.sync();
fileInfo.syncPoints.add(syncPoint);
}
for (T elem : elems) {
byte[] encodedElement = CoderUtils.encodeToByteArray(coder, elem);
fileWriter.append(ByteBuffer.wrap(encodedElement));
fileInfo.elementSizes.add(encodedElement.length);
fileInfo.totalElementEncodedSize += encodedElement.length;
}
}
}
return fileInfo;
}
use of org.apache.avro.generic.GenericDatumWriter in project beam by apache.
the class FakeJobService method writeRowsHelper.
private void writeRowsHelper(List<TableRow> rows, Schema avroSchema, String destinationPattern, int shard) {
String filename = destinationPattern.replace("*", String.format("%012d", shard));
try (WritableByteChannel channel = FileSystems.create(FileSystems.matchNewResource(filename, false), MimeTypes.BINARY);
DataFileWriter<GenericRecord> tableRowWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)).create(avroSchema, Channels.newOutputStream(channel))) {
for (Map<String, Object> record : rows) {
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
for (Map.Entry<String, Object> field : record.entrySet()) {
genericRecordBuilder.set(field.getKey(), field.getValue());
}
tableRowWriter.append(genericRecordBuilder.build());
}
} catch (IOException e) {
throw new IllegalStateException(String.format("Could not create destination for extract job %s", filename), e);
}
}
Aggregations