Search in sources :

Example 66 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project rest.li by linkedin.

the class AvroUtil method jsonFromGenericRecord.

public static String jsonFromGenericRecord(GenericRecord record) throws IOException {
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>();
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    Encoder jsonEncoder = AvroCompatibilityHelper.newJsonEncoder(record.getSchema(), outputStream, true);
    writer.setSchema(record.getSchema());
    writer.write(record, jsonEncoder);
    jsonEncoder.flush();
    return outputStream.toString();
}
Also used : Encoder(org.apache.avro.io.Encoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 67 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.

the class TestHBaseSerDe method getTestAvroBytesFromSchema.

private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
    Schema s = Schema.parse(schemaToUse);
    GenericData.Record record = new GenericData.Record(s);
    GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
    innerRecord.put("int1", 42);
    innerRecord.put("boolean1", true);
    innerRecord.put("long1", 42432234234l);
    if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
        innerRecord.put("string1", "new value");
    }
    record.put("aRecord", innerRecord);
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(s, out);
    dataFileWriter.append(record);
    dataFileWriter.close();
    byte[] data = out.toByteArray();
    out.close();
    return data;
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData)

Example 68 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project hive by apache.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    // add writer.time.zone property to file metadata
    dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString());
    dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN)));
    dfw.setMeta(AvroSerDe.WRITER_ZONE_CONVERSION_LEGACY, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_WRITE_LEGACY_CONVERSION_ENABLED)));
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 69 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project incubator-gobblin by apache.

the class AvroRecursionEliminatingConverterTest method generateRecord.

public File generateRecord() throws IOException {
    Schema inputSchema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/recursive.avsc"));
    GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(inputSchema);
    GenericRecord record = new GenericData.Record(inputSchema);
    record.put("name", "John");
    record.put("date_of_birth", 1234L);
    record.put("last_modified", 4567L);
    record.put("created", 6789L);
    GenericRecord addressRecord = new GenericData.Record(inputSchema.getField("address").schema());
    addressRecord.put("city", "Los Angeles");
    addressRecord.put("street_number", 1234);
    GenericRecord innerAddressRecord = new GenericData.Record(inputSchema.getField("address").schema());
    innerAddressRecord.put("city", "San Francisco");
    innerAddressRecord.put("street_number", 3456);
    addressRecord.put("previous_address", innerAddressRecord);
    record.put("address", addressRecord);
    File recordFile = File.createTempFile(this.getClass().getSimpleName(), "avsc");
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    dataFileWriter.create(inputSchema, recordFile);
    dataFileWriter.append(record);
    dataFileWriter.close();
    recordFile.deleteOnExit();
    return recordFile;
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) GenericRecord(org.apache.avro.generic.GenericRecord) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 70 with GenericDatumWriter

use of org.apache.avro.generic.GenericDatumWriter in project incubator-gobblin by apache.

the class AvroGenericRecordAccessorTest method serializeRecord.

@AfterMethod
public void serializeRecord(ITestResult result) throws IOException {
    if (result.isSuccess() && result.getThrowable() == null) {
        /* Serialize the GenericRecord; this can catch issues in set() that the underlying GenericRecord
     * may not catch until serialize time
     */
        DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(recordSchema);
        ByteArrayOutputStream bOs = new ByteArrayOutputStream();
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(bOs, null);
        datumWriter.write(record, encoder);
        encoder.flush();
        bOs.flush();
        Assert.assertTrue(bOs.toByteArray().length > 0);
    }
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord) AfterMethod(org.testng.annotations.AfterMethod)

Aggregations

GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)127 GenericRecord (org.apache.avro.generic.GenericRecord)105 Schema (org.apache.avro.Schema)69 ByteArrayOutputStream (java.io.ByteArrayOutputStream)57 DataFileWriter (org.apache.avro.file.DataFileWriter)47 File (java.io.File)40 Test (org.junit.Test)37 IOException (java.io.IOException)29 BinaryEncoder (org.apache.avro.io.BinaryEncoder)29 MockFlowFile (org.apache.nifi.util.MockFlowFile)25 Encoder (org.apache.avro.io.Encoder)23 TestRunner (org.apache.nifi.util.TestRunner)20 HashMap (java.util.HashMap)14 ByteArrayOutputStream (org.apache.nifi.stream.io.ByteArrayOutputStream)14 GenericData (org.apache.avro.generic.GenericData)12 ByteArrayInputStream (java.io.ByteArrayInputStream)11 FileOutputStream (java.io.FileOutputStream)10 InputStream (java.io.InputStream)9 ArrayList (java.util.ArrayList)8 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)8