Search in sources :

Example 76 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class DbusEventAvroDecoder method dumpMetadata.

public void dumpMetadata(DbusEvent e, FileChannel writeChannel) {
    GenericRecord genericRecord = this.getMetadata(e, null);
    if (//no metadata
    genericRecord == null)
        return;
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    try {
        String metadataInfo = genericRecord.toString() + "\n";
        baos.write(metadataInfo.getBytes("UTF-8"));
        ByteBuffer writeBuffer = ByteBuffer.wrap(baos.toByteArray());
        writeChannel.write(writeBuffer);
    } catch (UnsupportedEncodingException e1) {
        LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
    } catch (IOException e1) {
        LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
    }
}
Also used : UnsupportedEncodingException(java.io.UnsupportedEncodingException) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer)

Example 77 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class TestInternalMetadata method testGetMetadata_UnhappyPath_EventHasNoMetadata.

/**
   * Verifies that getMetadata() returns null if event has no metadata.
   */
@Test
public void testGetMetadata_UnhappyPath_EventHasNoMetadata() throws Exception {
    LOG.info("starting testGetMetadata_UnhappyPath_EventHasNoMetadata()");
    // build the event without any metadata
    DbusEvent event = createEvent(null);
    // create a metadata schema set, just because we like to
    VersionedSchemaSet metadataSchemaSet = new VersionedSchemaSet();
    metadataSchemaSet.add(SchemaRegistryService.DEFAULT_METADATA_SCHEMA_SOURCE, METADATA_SCHEMA_VERSION, new SchemaId(METADATA_SCHEMA_CHECKSUM), CORRECT_METADATA_SCHEMA, // preserve original string
    true);
    // now create the decoder and attempt to use it to extract and decode the event's metadata
    DbusEventAvroDecoder eventDecoder = createDecoder(metadataSchemaSet);
    try {
        GenericRecord reuse = null;
        GenericRecord decodedMetadata = eventDecoder.getMetadata(event, reuse);
        Assert.assertNull(decodedMetadata, "getMetadata() should have returned null;");
    } catch (Exception ex) {
        Assert.fail("getMetadata() should not have thrown exception: " + ex);
    }
    LOG.info("leaving testGetMetadata_UnhappyPath_EventHasNoMetadata()");
}
Also used : DbusEvent(com.linkedin.databus.core.DbusEvent) DbusEventAvroDecoder(com.linkedin.databus.client.DbusEventAvroDecoder) SchemaId(com.linkedin.databus2.schemas.SchemaId) VersionedSchemaSet(com.linkedin.databus2.schemas.VersionedSchemaSet) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.testng.annotations.Test)

Example 78 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project beam by apache.

the class BigQueryAvroUtils method convertRequiredField.

private static Object convertRequiredField(Type avroType, TableFieldSchema fieldSchema, Object v) {
    // REQUIRED fields are represented as the corresponding Avro types. For example, a BigQuery
    // INTEGER type maps to an Avro LONG type.
    checkNotNull(v, "REQUIRED field %s should not be null", fieldSchema.getName());
    // Per https://cloud.google.com/bigquery/docs/reference/v2/tables#schema, the type field
    // is required, so it may not be null.
    String bqType = fieldSchema.getType();
    Type expectedAvroType = BIG_QUERY_TO_AVRO_TYPES.get(bqType);
    verifyNotNull(expectedAvroType, "Unsupported BigQuery type: %s", bqType);
    verify(avroType == expectedAvroType, "Expected Avro schema type %s, not %s, for BigQuery %s field %s", expectedAvroType, avroType, bqType, fieldSchema.getName());
    switch(fieldSchema.getType()) {
        case "STRING":
        case "DATE":
        case "DATETIME":
        case "TIME":
            // Avro will use a CharSequence to represent String objects, but it may not always use
            // java.lang.String; for example, it may prefer org.apache.avro.util.Utf8.
            verify(v instanceof CharSequence, "Expected CharSequence (String), got %s", v.getClass());
            return v.toString();
        case "INTEGER":
            verify(v instanceof Long, "Expected Long, got %s", v.getClass());
            return ((Long) v).toString();
        case "FLOAT":
            verify(v instanceof Double, "Expected Double, got %s", v.getClass());
            return v;
        case "BOOLEAN":
            verify(v instanceof Boolean, "Expected Boolean, got %s", v.getClass());
            return v;
        case "TIMESTAMP":
            // TIMESTAMP data types are represented as Avro LONG types. They are converted back to
            // Strings with variable-precision (up to six digits) to match the JSON files export
            // by BigQuery.
            verify(v instanceof Long, "Expected Long, got %s", v.getClass());
            Double doubleValue = ((Long) v) / 1000000.0;
            return formatTimestamp(doubleValue.toString());
        case "RECORD":
            verify(v instanceof GenericRecord, "Expected GenericRecord, got %s", v.getClass());
            return convertGenericRecordToTableRow((GenericRecord) v, fieldSchema.getFields());
        case "BYTES":
            verify(v instanceof ByteBuffer, "Expected ByteBuffer, got %s", v.getClass());
            ByteBuffer byteBuffer = (ByteBuffer) v;
            byte[] bytes = new byte[byteBuffer.limit()];
            byteBuffer.get(bytes);
            return BaseEncoding.base64().encode(bytes);
        default:
            throw new UnsupportedOperationException(String.format("Unexpected BigQuery field schema type %s for field named %s", fieldSchema.getType(), fieldSchema.getName()));
    }
}
Also used : Type(org.apache.avro.Schema.Type) GenericRecord(org.apache.avro.generic.GenericRecord) ByteBuffer(java.nio.ByteBuffer)

Example 79 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project beam by apache.

the class AvroSourceTest method testCreationWithSchema.

@Test
public void testCreationWithSchema() throws Exception {
    List<Bird> expected = createRandomRecords(100);
    String filename = generateTestFile("tmp.avro", expected, SyncBehavior.SYNC_DEFAULT, 0, AvroCoder.of(Bird.class), DataFileConstants.NULL_CODEC);
    // Create a source with a schema object
    Schema schema = ReflectData.get().getSchema(Bird.class);
    AvroSource<GenericRecord> source = AvroSource.from(filename).withSchema(schema);
    List<GenericRecord> records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);
    // Create a source with a JSON schema
    String schemaString = ReflectData.get().getSchema(Bird.class).toString();
    source = AvroSource.from(filename).withSchema(schemaString);
    records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);
    // Create a source with no schema
    source = AvroSource.from(filename);
    records = SourceTestUtils.readFromSource(source, null);
    assertEqualsWithGeneric(expected, records);
}
Also used : Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 80 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project beam by apache.

the class AvroCoderTest method testGenericRecordEncoding.

@Test
public void testGenericRecordEncoding() throws Exception {
    String schemaString = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + "     {\"name\": \"name\", \"type\": \"string\"},\n" + "     {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + "     {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}";
    Schema schema = (new Schema.Parser()).parse(schemaString);
    GenericRecord before = new GenericData.Record(schema);
    before.put("name", "Bob");
    before.put("favorite_number", 256);
    // Leave favorite_color null
    AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema);
    CoderProperties.coderDecodeEncodeEqual(coder, before);
    Assert.assertEquals(schema, coder.getSchema());
}
Also used : Schema(org.apache.avro.Schema) AvroSchema(org.apache.avro.reflect.AvroSchema) GenericRecord(org.apache.avro.generic.GenericRecord) Matchers.containsString(org.hamcrest.Matchers.containsString) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11