use of org.apache.avro.generic.GenericDatumWriter in project cdap by caskdata.
the class StoreHandler method encodeRollbackDetail.
/**
* Encodes the {@link RollbackDetail} object as avro record based on the {@link Schemas.V1.PublishResponse#SCHEMA}.
*/
private ChannelBuffer encodeRollbackDetail(RollbackDetail rollbackDetail) throws IOException {
Schema schema = Schemas.V1.PublishResponse.SCHEMA;
// Constructs the response object as GenericRecord
GenericRecord response = new GenericData.Record(schema);
response.put("transactionWritePointer", rollbackDetail.getTransactionWritePointer());
GenericRecord rollbackRange = new GenericData.Record(schema.getField("rollbackRange").schema());
rollbackRange.put("startTimestamp", rollbackDetail.getStartTimestamp());
rollbackRange.put("startSequenceId", rollbackDetail.getStartSequenceId());
rollbackRange.put("endTimestamp", rollbackDetail.getEndTimestamp());
rollbackRange.put("endSequenceId", rollbackDetail.getEndSequenceId());
response.put("rollbackRange", rollbackRange);
// For V1 PublishResponse, it contains an union(long, null) and then 2 longs and 2 integers,
// hence the max size is 38
// (union use 1 byte, long max size is 9 bytes, integer max size is 5 bytes in avro binary encoding)
ChannelBuffer buffer = ChannelBuffers.dynamicBuffer(38);
Encoder encoder = EncoderFactory.get().directBinaryEncoder(new ChannelBufferOutputStream(buffer), null);
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
datumWriter.write(response, encoder);
return buffer;
}
use of org.apache.avro.generic.GenericDatumWriter in project spark-dataflow by cloudera.
the class AvroPipelineTest method populateGenericFile.
private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
FileOutputStream outputStream = new FileOutputStream(this.inputFile);
GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
dataFileWriter.create(schema, outputStream);
for (GenericRecord record : genericRecords) {
dataFileWriter.append(record);
}
}
outputStream.close();
}
use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.
the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.
@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
// Need to verify that when reading a datum with an updated reader schema
// that the datum then returns the reader schema as its own, since we
// depend on this behavior in order to avoid re-encoding the datum
// in the serde.
String v0 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " }\n" + " ]\n" + "}";
String v1 = "{\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"name\": \"SomeStuff\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + " {\n" + " \"name\":\"v0\",\n" + " \"type\":\"string\"\n" + " },\n" + " {\n" + " \"name\":\"v1\",\n" + " \"type\":\"string\",\n" + " \"default\":\"v1_default\"" + " }\n" + " ]\n" + "}";
Schema[] schemas = { AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1) };
// Encode a schema with v0, write out.
GenericRecord record = new GenericData.Record(schemas[0]);
record.put("v0", "v0 value");
assertTrue(GenericData.get().validate(schemas[0], record));
// Write datum out to a stream
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
dfw.create(schemas[0], baos);
dfw.append(record);
dfw.close();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
gdr.setExpected(schemas[1]);
DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
assertTrue(dfs.hasNext());
GenericRecord next = dfs.next();
assertEquals("v0 value", next.get("v0").toString());
assertEquals("v1_default", next.get("v1").toString());
// Now the most important check - when we query this record for its schema,
// we should get back the latest, reader schema:
assertEquals(schemas[1], next.getSchema());
}
use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.
the class AvroGenericRecordWritable method write.
@Override
public void write(DataOutput out) throws IOException {
// Write schema since we need it to pull the data out. (see point #1 above)
String schemaString = record.getSchema().toString(false);
out.writeUTF(schemaString);
schemaString = fileSchema.toString(false);
out.writeUTF(schemaString);
recordReaderID.write(out);
// Write record to byte buffer
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>();
BinaryEncoder be = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null);
gdw.setSchema(record.getSchema());
gdw.write(record, be);
}
use of org.apache.avro.generic.GenericDatumWriter in project storm by apache.
the class AvroSerializer method write.
@Override
public ByteBuffer write(List<Object> data, ByteBuffer buffer) {
Preconditions.checkArgument(data != null && data.size() == fieldNames.size(), "Invalid schemas");
try {
Schema schema = schemas.getSchema(schemaString);
GenericRecord record = new GenericData.Record(schema);
for (int i = 0; i < fieldNames.size(); i++) {
record.put(fieldNames.get(i), data.get(i));
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null);
writer.write(record, encoder);
encoder.flush();
byte[] bytes = out.toByteArray();
out.close();
return ByteBuffer.wrap(bytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations