Search in sources :

Example 6 with SpecificDatumWriter

use of org.apache.avro.specific.SpecificDatumWriter in project gora by apache.

the class HBaseByteInterface method toBytes.

/**
   * Serializes an object following the given schema.
   * Does not handle <code>array/map</code> if it is not inside a <code>record</code>
   * @param o Utf8|ByteBuffer|Integer|Long|Float|Double|Boolean|Enum|Persistent
   * @param schema The schema describing the object (or a compatible description)
   * @return array of bytes of the serialized object
   * @throws IOException
   */
@SuppressWarnings({ "rawtypes", "unchecked" })
public static byte[] toBytes(Object o, Schema schema) throws IOException {
    Type type = schema.getType();
    switch(type) {
        // TODO: maybe ((Utf8)o).getBytes(); ?
        case STRING:
            return Bytes.toBytes(((CharSequence) o).toString());
        case BYTES:
            return ((ByteBuffer) o).array();
        case INT:
            return Bytes.toBytes((Integer) o);
        case LONG:
            return Bytes.toBytes((Long) o);
        case FLOAT:
            return Bytes.toBytes((Float) o);
        case DOUBLE:
            return Bytes.toBytes((Double) o);
        case BOOLEAN:
            return (Boolean) o ? new byte[] { 1 } : new byte[] { 0 };
        case ENUM:
            return new byte[] { (byte) ((Enum<?>) o).ordinal() };
        case UNION:
        case RECORD:
            SpecificDatumWriter writer = writerMap.get(schema.getFullName());
            if (writer == null) {
                // ignore dirty bits
                writer = new SpecificDatumWriter(schema);
                writerMap.put(schema.getFullName(), writer);
            }
            BinaryEncoder encoderFromCache = encoders.get();
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            outputStream.set(bos);
            BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(bos, null);
            if (encoderFromCache == null) {
                encoders.set(encoder);
            }
            //reset the buffers
            ByteArrayOutputStream os = outputStream.get();
            os.reset();
            writer.write(o, encoder);
            encoder.flush();
            return os.toByteArray();
        default:
            throw new RuntimeException("Unknown type: " + type);
    }
}
Also used : Type(org.apache.avro.Schema.Type) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ByteBuffer(java.nio.ByteBuffer) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 7 with SpecificDatumWriter

use of org.apache.avro.specific.SpecificDatumWriter in project gora by apache.

the class SolrStore method serializeFieldValue.

@SuppressWarnings("unchecked")
private Object serializeFieldValue(Schema fieldSchema, Object fieldValue) {
    switch(fieldSchema.getType()) {
        case MAP:
        case ARRAY:
        case RECORD:
            byte[] data = null;
            try {
                @SuppressWarnings("rawtypes") SpecificDatumWriter writer = getDatumWriter(fieldSchema);
                data = IOUtils.serialize(writer, fieldValue);
            } catch (IOException e) {
                LOG.error(e.getMessage(), e);
            }
            fieldValue = data;
            break;
        case BYTES:
            fieldValue = ((ByteBuffer) fieldValue).array();
            break;
        case ENUM:
        case STRING:
            fieldValue = fieldValue.toString();
            break;
        case UNION:
            // All other types are serialized.
            if (fieldSchema.getTypes().size() == 2 && isNullable(fieldSchema)) {
                int schemaPos = getUnionSchema(fieldValue, fieldSchema);
                Schema unionSchema = fieldSchema.getTypes().get(schemaPos);
                fieldValue = serializeFieldValue(unionSchema, fieldValue);
            } else {
                byte[] serilazeData = null;
                try {
                    @SuppressWarnings("rawtypes") SpecificDatumWriter writer = getDatumWriter(fieldSchema);
                    serilazeData = IOUtils.serialize(writer, fieldValue);
                } catch (IOException e) {
                    LOG.error(e.getMessage(), e);
                }
                fieldValue = serilazeData;
            }
            break;
        default:
            // LOG.error("Unknown field type: " + fieldSchema.getType());
            break;
    }
    return fieldValue;
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 8 with SpecificDatumWriter

use of org.apache.avro.specific.SpecificDatumWriter in project eiger by wlloyd.

the class SerDeUtils method serializeWithSchema.

/**
     * Serializes a single object along with its Schema. NB: For performance critical areas, it is <b>much</b>
     * more efficient to store the Schema independently.
     * @param o Object to serialize
     */
public static <T extends SpecificRecord> ByteBuffer serializeWithSchema(T o) throws IOException {
    OutputBuffer buff = new OutputBuffer();
    BinaryEncoder enc = new BinaryEncoder(buff);
    enc.writeString(new Utf8(o.getSchema().toString()));
    SpecificDatumWriter<T> writer = new SpecificDatumWriter<T>(o.getSchema());
    writer.write(o, enc);
    enc.flush();
    return ByteBuffer.wrap(buff.asByteArray());
}
Also used : BinaryEncoder(org.apache.avro.io.BinaryEncoder) Utf8(org.apache.avro.util.Utf8) OutputBuffer(org.apache.cassandra.io.util.OutputBuffer) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter)

Example 9 with SpecificDatumWriter

use of org.apache.avro.specific.SpecificDatumWriter in project hive by apache.

the class TestHBaseSerDe method getTestAvroBytesFromClass2.

private byte[] getTestAvroBytesFromClass2(int i) throws IOException {
    Employee employee = new Employee();
    employee.setEmployeeName("Avro Employee" + i);
    employee.setEmployeeID(11111L);
    employee.setGender(Gender.FEMALE);
    employee.setAge(25L);
    Address address = new Address();
    address.setAddress1("Avro First Address" + i);
    address.setAddress2("Avro Second Address" + i);
    address.setCity("Avro City" + i);
    address.setZipcode(123456L);
    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
    metadata.put("testkey", "testvalue");
    address.setMetadata(metadata);
    HomePhone hPhone = new HomePhone();
    hPhone.setAreaCode(999L);
    hPhone.setNumber(1234567890L);
    OfficePhone oPhone = new OfficePhone();
    oPhone.setAreaCode(999L);
    oPhone.setNumber(1234455555L);
    ContactInfo contact = new ContactInfo();
    List<Address> addresses = new ArrayList<Address>();
    // set value for the union type
    address.setCounty(hPhone);
    addresses.add(address);
    addresses.add(address);
    contact.setAddress(addresses);
    contact.setHomePhone(hPhone);
    contact.setOfficePhone(oPhone);
    employee.setContactInfo(contact);
    DatumWriter<Employee> employeeWriter = new SpecificDatumWriter<Employee>(Employee.class);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
    // write out a header for the payload
    out.write(TEST_BYTE_ARRAY);
    employeeWriter.write(employee, encoder);
    encoder.flush();
    return out.toByteArray();
}
Also used : OfficePhone(org.apache.hadoop.hive.hbase.avro.OfficePhone) Address(org.apache.hadoop.hive.hbase.avro.Address) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Employee(org.apache.hadoop.hive.hbase.avro.Employee) HomePhone(org.apache.hadoop.hive.hbase.avro.HomePhone) Encoder(org.apache.avro.io.Encoder) ContactInfo(org.apache.hadoop.hive.hbase.avro.ContactInfo)

Example 10 with SpecificDatumWriter

use of org.apache.avro.specific.SpecificDatumWriter in project crunch by cloudera.

the class MultiAvroSchemaJoinTest method setUp.

@Before
public void setUp() throws Exception {
    this.personFile = File.createTempFile("person", ".avro");
    this.employeeFile = File.createTempFile("employee", ".avro");
    DatumWriter<Person> pdw = new SpecificDatumWriter<Person>();
    DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw);
    pfw.create(Person.SCHEMA$, personFile);
    Person p1 = new Person();
    p1.setName("Josh");
    p1.setAge(19);
    p1.setSiblingnames(ImmutableList.<CharSequence>of("Kate", "Mike"));
    pfw.append(p1);
    Person p2 = new Person();
    p2.setName("Kate");
    p2.setAge(17);
    p2.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Mike"));
    pfw.append(p2);
    Person p3 = new Person();
    p3.setName("Mike");
    p3.setAge(12);
    p3.setSiblingnames(ImmutableList.<CharSequence>of("Josh", "Kate"));
    pfw.append(p3);
    pfw.close();
    DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>();
    DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw);
    efw.create(Employee.SCHEMA$, employeeFile);
    Employee e1 = new Employee();
    e1.setName("Kate");
    e1.setSalary(100000);
    e1.setDepartment("Marketing");
    efw.append(e1);
    efw.close();
}
Also used : Employee(org.apache.crunch.test.Employee) DataFileWriter(org.apache.avro.file.DataFileWriter) Person(org.apache.crunch.test.Person) SpecificDatumWriter(org.apache.avro.specific.SpecificDatumWriter) Before(org.junit.Before)

Aggregations

SpecificDatumWriter (org.apache.avro.specific.SpecificDatumWriter)17 DataFileWriter (org.apache.avro.file.DataFileWriter)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 Schema (org.apache.avro.Schema)4 BinaryEncoder (org.apache.avro.io.BinaryEncoder)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Encoder (org.apache.avro.io.Encoder)3 Person (org.apache.crunch.test.Person)3 FileOutputStream (java.io.FileOutputStream)2 OutputBuffer (org.apache.cassandra.io.util.OutputBuffer)2 Address (org.apache.flink.api.io.avro.generated.Address)2 User (org.apache.flink.api.io.avro.generated.User)2 Address (org.apache.hadoop.hive.hbase.avro.Address)2 ContactInfo (org.apache.hadoop.hive.hbase.avro.ContactInfo)2 Employee (org.apache.hadoop.hive.hbase.avro.Employee)2 HomePhone (org.apache.hadoop.hive.hbase.avro.HomePhone)2 OfficePhone (org.apache.hadoop.hive.hbase.avro.OfficePhone)2 Before (org.junit.Before)2