Search in sources :

Example 36 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method canSerializeFixed.

@Test
public void canSerializeFixed() throws SerDeException, IOException {
    String field = "{ \"name\":\"fixed1\", \"type\":{\"type\":\"fixed\", \"name\":\"threebytes\", \"size\":3} }";
    GenericData.Fixed fixed = new GenericData.Fixed(buildSchema(field), "k9@".getBytes());
    GenericRecord r = serializeAndDeserialize(field, "fixed1", fixed);
    assertArrayEquals(fixed.bytes(), ((GenericData.Fixed) r.get("fixed1")).bytes());
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData) Test(org.junit.Test)

Example 37 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method singleFieldTest.

private void singleFieldTest(String fieldName, Object fieldValue, String fieldType) throws SerDeException, IOException {
    GenericRecord r2 = serializeAndDeserialize("{ \"name\":\"" + fieldName + "\", \"type\":\"" + fieldType + "\" }", fieldName, fieldValue);
    assertEquals(fieldValue, r2.get(fieldName));
}
Also used : GenericRecord(org.apache.avro.generic.GenericRecord)

Example 38 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestAvroSerializer method serializeAndDeserialize.

/**
   * Verify that we can serialize an avro value by taking one, running it through
   * the deser process and then serialize it again.
   */
private GenericRecord serializeAndDeserialize(String recordValue, String fieldName, Object fieldValue) throws SerDeException, IOException {
    Schema s = buildSchema(recordValue);
    GenericData.Record r = new GenericData.Record(s);
    r.put(fieldName, fieldValue);
    AvroSerializer as = new AvroSerializer();
    AvroDeserializer ad = new AvroDeserializer();
    AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
    ObjectInspector oi = aoig.getObjectInspector();
    List<String> columnNames = aoig.getColumnNames();
    List<TypeInfo> columnTypes = aoig.getColumnTypes();
    AvroGenericRecordWritable agrw = Utils.serializeAndDeserializeRecord(r);
    Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
    Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
    assertTrue(result instanceof AvroGenericRecordWritable);
    GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
    assertEquals(s, r2.getSchema());
    return r2;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Schema(org.apache.avro.Schema) Writable(org.apache.hadoop.io.Writable) GenericData(org.apache.avro.generic.GenericData) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 39 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project haivvreo by jghoman.

the class TestSchemaReEncoder method schemasCanAddFields.

@Test
public void schemasCanAddFields() throws SerDeException {
    String original = "{\n" + "    \"namespace\": \"com.linkedin.haivvreo\",\n" + "    \"name\": \"Line\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"text\",\n" + "            \"type\":\"string\"\n" + "        }\n" + "    ]\n" + "}";
    String evolved = "{\n" + "    \"namespace\": \"com.linkedin.haivvreo\",\n" + "    \"name\": \"Line\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"text\",\n" + "            \"type\":\"string\"\n" + "        },\n" + "        {\n" + "            \"name\":\"new_kid\",\n" + "            \"type\":\"string\",\n" + "            \"default\":\"Hi!\"\n" + "        }\n" + "    ]\n" + "}";
    Schema originalSchema = Schema.parse(original);
    Schema evolvedSchema = Schema.parse(evolved);
    GenericRecord record = new GenericData.Record(originalSchema);
    record.put("text", "it is a far better thing I do, yadda, yadda");
    assertTrue(GenericData.get().validate(originalSchema, record));
    AvroDeserializer.SchemaReEncoder schemaReEncoder = new AvroDeserializer.SchemaReEncoder();
    GenericRecord r2 = schemaReEncoder.reencode(record, evolvedSchema);
    assertTrue(GenericData.get().validate(evolvedSchema, r2));
    assertEquals("Hi!", r2.get("new_kid").toString());
    // Now make sure that we can re-use the re-encoder against a completely
    // different record to save resources
    String original2 = "{\n" + "    \"namespace\": \"somebody.else\",\n" + "    \"name\": \"something_else\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"a\",\n" + "            \"type\":\"int\"\n" + "        }\n" + "    ]\n" + "}";
    String evolved2 = "{\n" + "    \"namespace\": \"somebody.else\",\n" + "    \"name\": \"something_else\",\n" + "    \"type\": \"record\",\n" + "    \"fields\": [\n" + "        {\n" + "            \"name\":\"a\",\n" + "            \"type\":\"int\"\n" + "        },\n" + "        {\n" + "            \"name\":\"b\",\n" + "            \"type\":\"long\",\n" + "            \"default\":42\n" + "        }\n" + "    ]\n" + "}";
    Schema originalSchema2 = Schema.parse(original2);
    Schema evolvedSchema2 = Schema.parse(evolved2);
    record = new GenericData.Record(originalSchema2);
    record.put("a", 19);
    assertTrue(GenericData.get().validate(originalSchema2, record));
    r2 = schemaReEncoder.reencode(record, evolvedSchema2);
    assertTrue(GenericData.get().validate(evolvedSchema2, r2));
    assertEquals(42l, r2.get("b"));
}
Also used : Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) GenericData(org.apache.avro.generic.GenericData) Test(org.junit.Test)

Example 40 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class ThirdeyeAvroUtils method extractSchemaFromAvro.

/**
   * extracts avro schema from avro file
   * @param avroFile
   * @return
   * @throws FileNotFoundException
   * @throws IOException
   */
public static Schema extractSchemaFromAvro(Path avroFile) throws IOException {
    DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
    Schema avroSchema = dataStreamReader.getSchema();
    dataStreamReader.close();
    return avroSchema;
}
Also used : Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11