Search in sources :

Example 21 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project cdap by caskdata.

the class StoreHandler method encodeRollbackDetail.

/**
   * Encodes the {@link RollbackDetail} object as avro record based on the {@link Schemas.V1.PublishResponse#SCHEMA}.
   */
private ChannelBuffer encodeRollbackDetail(RollbackDetail rollbackDetail) throws IOException {
    Schema schema = Schemas.V1.PublishResponse.SCHEMA;
    // Constructs the response object as GenericRecord
    GenericRecord response = new GenericData.Record(schema);
    response.put("transactionWritePointer", rollbackDetail.getTransactionWritePointer());
    GenericRecord rollbackRange = new GenericData.Record(schema.getField("rollbackRange").schema());
    rollbackRange.put("startTimestamp", rollbackDetail.getStartTimestamp());
    rollbackRange.put("startSequenceId", rollbackDetail.getStartSequenceId());
    rollbackRange.put("endTimestamp", rollbackDetail.getEndTimestamp());
    rollbackRange.put("endSequenceId", rollbackDetail.getEndSequenceId());
    response.put("rollbackRange", rollbackRange);
    // For V1 PublishResponse, it contains an union(long, null) and then 2 longs and 2 integers,
    // hence the max size is 38
    // (union use 1 byte, long max size is 9 bytes, integer max size is 5 bytes in avro binary encoding)
    ChannelBuffer buffer = ChannelBuffers.dynamicBuffer(38);
    Encoder encoder = EncoderFactory.get().directBinaryEncoder(new ChannelBufferOutputStream(buffer), null);
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    datumWriter.write(response, encoder);
    return buffer;
}
Also used : ChannelBufferOutputStream(org.jboss.netty.buffer.ChannelBufferOutputStream) Encoder(org.apache.avro.io.Encoder) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) ChannelBuffer(org.jboss.netty.buffer.ChannelBuffer)

Example 22 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project spark-dataflow by cloudera.

the class AvroPipelineTest method populateGenericFile.

private void populateGenericFile(List<GenericRecord> genericRecords, Schema schema) throws IOException {
    FileOutputStream outputStream = new FileOutputStream(this.inputFile);
    GenericDatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(genericDatumWriter)) {
        dataFileWriter.create(schema, outputStream);
        for (GenericRecord record : genericRecords) {
            dataFileWriter.append(record);
        }
    }
    outputStream.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 23 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.

the class TestThatEvolvedSchemasActAsWeWant method resolvedSchemasShouldReturnReaderSchema.

@Test
public void resolvedSchemasShouldReturnReaderSchema() throws IOException {
    // Need to verify that when reading a datum with an updated reader schema
    // that the datum then returns the reader schema as its own, since we
    // depend on this behavior in order to avoid re-encoding the datum
    // in the serde.
    String v0 = "{\n" + "    \"namespace\": \"org.apache.hadoop.hive\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        }\n" + "    ]\n" + "}";
    String v1 = "{\n" + "    \"namespace\": \"org.apache.hadoop.hive\",\n" + "    \"name\": \"SomeStuff\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"v0\",\n" + "            \"type\":\"string\"\n" + "        },\n" + "        {\n" + "            \"name\":\"v1\",\n" + "            \"type\":\"string\",\n" + "            \"default\":\"v1_default\"" + "        }\n" + "    ]\n" + "}";
    Schema[] schemas = { AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1) };
    // Encode a schema with v0, write out.
    GenericRecord record = new GenericData.Record(schemas[0]);
    record.put("v0", "v0 value");
    assertTrue(GenericData.get().validate(schemas[0], record));
    // Write datum out to a stream
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schemas[0]);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    dfw.create(schemas[0], baos);
    dfw.append(record);
    dfw.close();
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    gdr.setExpected(schemas[1]);
    DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, gdr);
    assertTrue(dfs.hasNext());
    GenericRecord next = dfs.next();
    assertEquals("v0 value", next.get("v0").toString());
    assertEquals("v1_default", next.get("v1").toString());
    // Now the most important check - when we query this record for its schema,
    // we should get back the latest, reader schema:
    assertEquals(schemas[1], next.getSchema());
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 24 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.

the class AvroGenericRecordWritable method write.

@Override
public void write(DataOutput out) throws IOException {
    // Write schema since we need it to pull the data out. (see point #1 above)
    String schemaString = record.getSchema().toString(false);
    out.writeUTF(schemaString);
    schemaString = fileSchema.toString(false);
    out.writeUTF(schemaString);
    recordReaderID.write(out);
    // Write record to byte buffer
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>();
    BinaryEncoder be = EncoderFactory.get().directBinaryEncoder((DataOutputStream) out, null);
    gdw.setSchema(record.getSchema());
    gdw.write(record, be);
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 25 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project storm by apache.

the class AvroSerializer method write.

@Override
public ByteBuffer write(List<Object> data, ByteBuffer buffer) {
    Preconditions.checkArgument(data != null && data.size() == fieldNames.size(), "Invalid schemas");
    try {
        Schema schema = schemas.getSchema(schemaString);
        GenericRecord record = new GenericData.Record(schema);
        for (int i = 0; i < fieldNames.size(); i++) {
            record.put(fieldNames.get(i), data.get(i));
        }
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
        Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null);
        writer.write(record, encoder);
        encoder.flush();
        byte[] bytes = out.toByteArray();
        out.close();
        return ByteBuffer.wrap(bytes);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : Encoder(org.apache.avro.io.Encoder) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)49 GenericRecord (org.apache.avro.generic.GenericRecord)46 ByteArrayOutputStream (java.io.ByteArrayOutputStream)24 Schema (org.apache.avro.Schema)23 DataFileWriter (org.apache.avro.file.DataFileWriter)17 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 IOException (java.io.IOException)13 Encoder (org.apache.avro.io.Encoder)12 File (java.io.File)9 Test (org.junit.Test)6 FileOutputStream (java.io.FileOutputStream)4 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)4 ArrayList (java.util.ArrayList)3 Properties (java.util.Properties)3 Producer (kafka.javaapi.producer.Producer)3 ProducerConfig (kafka.producer.ProducerConfig)3 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)3 JsonEncoder (org.apache.avro.io.JsonEncoder)3 AvroAdapter (com.linkedin.data.avro.AvroAdapter)2 DbusEventInfo (com.linkedin.databus.core.DbusEventInfo)2