Search in sources :

Example 46 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project rest.li by linkedin.

the class TestSchemaTranslator method testToAvroSchema.

private void testToAvroSchema(String schemaText, Object[] row) throws IOException {
    boolean debug = false;
    if (debug)
        System.out.println(schemaText);
    for (int i = 1; i < row.length; i++) {
        Object[] modeInputs = (Object[]) row[i];
        OptionalDefaultMode[] optionalDefaultModes = (OptionalDefaultMode[]) modeInputs[0];
        Object expected = modeInputs[1];
        for (EmbedSchemaMode embedSchemaMode : EmbedSchemaMode.values()) {
            for (OptionalDefaultMode optionalDefaultMode : optionalDefaultModes) {
                DataSchema schema = TestUtil.dataSchemaFromString(schemaText);
                String preTranslateSchemaText = schema.toString();
                Exception exc = null;
                String avroTextFromSchema = null;
                try {
                    avroTextFromSchema = SchemaTranslator.dataToAvroSchemaJson(schema, new DataToAvroSchemaTranslationOptions(optionalDefaultMode, JsonBuilder.Pretty.SPACES, embedSchemaMode));
                    if (debug) {
                        System.out.println("EmbeddedSchema: " + embedSchemaMode + ", OptionalDefaultMode: " + optionalDefaultMode + ", Avro Schema: " + avroTextFromSchema);
                    }
                } catch (Exception e) {
                    exc = e;
                    if (debug) {
                        e.printStackTrace();
                    }
                }
                if (expected instanceof String) {
                    assertNull(exc);
                    String expectedAvroText = (String) expected;
                    if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                        // when embeddedSchema is enabled
                        // for map, array, enums. and records, we embed the original Pegasus schema
                        DataMap expectedAvroDataMap = TestUtil.dataMapFromString(expectedAvroText);
                        DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                        Object dataProperty = resultAvroDataMap.remove(SchemaTranslator.DATA_PROPERTY);
                        assertEquals(resultAvroDataMap, expectedAvroDataMap);
                        // look for embedded schema
                        assertNotNull(dataProperty);
                        assertTrue(dataProperty instanceof DataMap);
                        Object schemaProperty = ((DataMap) dataProperty).get(SchemaTranslator.SCHEMA_PROPERTY);
                        assertNotNull(schemaProperty);
                        assertTrue(schemaProperty instanceof DataMap);
                        // make sure embedded schema is same as the original schema
                        PegasusSchemaParser schemaParser = TestUtil.schemaParserFromObjects(Arrays.asList(schemaProperty));
                        DataSchema embeddedSchema = schemaParser.topLevelDataSchemas().get(0);
                        assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
                        // look for optional default mode
                        Object optionalDefaultModeProperty = ((DataMap) dataProperty).get(SchemaTranslator.OPTIONAL_DEFAULT_MODE_PROPERTY);
                        assertNotNull(optionalDefaultMode);
                        assertEquals(optionalDefaultModeProperty, optionalDefaultMode.toString());
                    } else {
                        // for unions and primitives, we never embed the pegasus schema
                        if (embedSchemaMode == EmbedSchemaMode.NONE && hasEmbeddedSchema(schema)) {
                            // make sure no embedded schema when
                            DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                            assertFalse(resultAvroDataMap.containsKey(SchemaTranslator.DATA_PROPERTY));
                        }
                        assertEquals(avroTextFromSchema, expectedAvroText);
                    }
                    String postTranslateSchemaText = schema.toString();
                    assertEquals(preTranslateSchemaText, postTranslateSchemaText);
                    // make sure Avro accepts it
                    Schema avroSchema = Schema.parse(avroTextFromSchema);
                    if (debug)
                        System.out.println("AvroSchema: " + avroSchema);
                    SchemaParser parser = new SchemaParser();
                    ValidationOptions options = new ValidationOptions();
                    options.setAvroUnionMode(true);
                    parser.setValidationOptions(options);
                    parser.parse(avroTextFromSchema);
                    assertFalse(parser.hasError(), parser.errorMessage());
                    if (optionalDefaultMode == DataToAvroSchemaTranslationOptions.DEFAULT_OPTIONAL_DEFAULT_MODE) {
                        // use other dataToAvroSchemaJson
                        String avroSchema2Json = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText));
                        String avroSchema2JsonCompact = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText), new DataToAvroSchemaTranslationOptions());
                        assertEquals(avroSchema2Json, avroSchema2JsonCompact);
                        Schema avroSchema2 = Schema.parse(avroSchema2Json);
                        assertEquals(avroSchema2, avroSchema);
                        // use dataToAvroSchema
                        Schema avroSchema3 = SchemaTranslator.dataToAvroSchema(TestUtil.dataSchemaFromString(schemaText));
                        assertEquals(avroSchema3, avroSchema2);
                    }
                    if (modeInputs.length >= 4) {
                        // check if the translated default value is good by using it.
                        // writer schema and Avro JSON value should not include fields with default values.
                        String writerSchemaText = (String) modeInputs[2];
                        String avroValueJson = (String) modeInputs[3];
                        Schema writerSchema = Schema.parse(writerSchemaText);
                        GenericRecord genericRecord = genericRecordFromString(avroValueJson, writerSchema, avroSchema);
                        if (modeInputs.length >= 5) {
                            String genericRecordJson = (String) modeInputs[4];
                            String genericRecordAsString = genericRecord.toString();
                            DataMap expectedGenericRecord = TestUtil.dataMapFromString(genericRecordJson);
                            DataMap resultGenericRecord = TestUtil.dataMapFromString(genericRecordAsString);
                            assertEquals(resultGenericRecord, expectedGenericRecord);
                        }
                    }
                    if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                        // if embedded schema is enabled, translate Avro back to Pegasus schema.
                        // the output Pegasus schema should be exactly same the input schema
                        // taking into account typeref.
                        AvroToDataSchemaTranslationOptions avroToDataSchemaMode = new AvroToDataSchemaTranslationOptions(AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA);
                        DataSchema embeddedSchema = SchemaTranslator.avroToDataSchema(avroTextFromSchema, avroToDataSchemaMode);
                        assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
                    }
                } else {
                    Class<?> expectedExceptionClass = (Class<?>) expected;
                    String expectedString = (String) modeInputs[2];
                    assertNotNull(exc);
                    assertNull(avroTextFromSchema);
                    assertTrue(expectedExceptionClass.isInstance(exc));
                    assertTrue(exc.getMessage().contains(expectedString), "\"" + exc.getMessage() + "\" does not contain \"" + expectedString + "\"");
                }
            }
        }
    }
}
Also used : PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataSchema(com.linkedin.data.schema.DataSchema) Schema(org.apache.avro.Schema) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) IOException(java.io.IOException) DataMap(com.linkedin.data.DataMap) DataSchema(com.linkedin.data.schema.DataSchema) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 47 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project rest.li by linkedin.

the class AvroUtil method jsonFromGenericRecord.

public static String jsonFromGenericRecord(GenericRecord record) throws IOException {
    GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>();
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
    AvroAdapter avroAdapter = AvroAdapterFinder.getAvroAdapter();
    Encoder jsonEncoder = avroAdapter.createJsonEncoder(record.getSchema(), outputStream);
    writer.setSchema(record.getSchema());
    writer.write(record, jsonEncoder);
    jsonEncoder.flush();
    return outputStream.toString();
}
Also used : AvroAdapter(com.linkedin.data.avro.AvroAdapter) Encoder(org.apache.avro.io.Encoder) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 48 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project rest.li by linkedin.

the class AnyRecordTranslator method avroGenericToData.

@Override
public Object avroGenericToData(DataTranslatorContext context, Object avroData, Schema avroSchema, DataSchema schema) {
    boolean error = false;
    Object result = null;
    GenericRecord genericRecord = null;
    try {
        genericRecord = (GenericRecord) avroData;
    } catch (ClassCastException e) {
        context.appendMessage("Error translating %1$s, it is not a GenericRecord", avroData);
        error = true;
    }
    if (error == false) {
        Utf8 type = null;
        Utf8 value = null;
        try {
            type = (Utf8) genericRecord.get(TYPE);
            value = (Utf8) genericRecord.get(VALUE);
        } catch (ClassCastException e) {
            context.appendMessage("Error translating %1$s, \"type\" or \"value\" is not a %2$s", avroData, Utf8.class.getSimpleName());
            error = true;
        }
        if (error == false) {
            if (type == null || value == null) {
                context.appendMessage("Error translating %1$s, \"type\" or \"value\" is null", avroData);
            } else {
                try {
                    DataMap valueDataMap = _codec.bytesToMap(value.getBytes());
                    DataMap anyDataMap = new DataMap(2);
                    anyDataMap.put(type.toString(), valueDataMap);
                    result = anyDataMap;
                } catch (IOException e) {
                    context.appendMessage("Error translating %1$s, %2$s", avroData, e);
                }
            }
        }
    }
    return result;
}
Also used : Utf8(org.apache.avro.util.Utf8) IOException(java.io.IOException) GenericRecord(org.apache.avro.generic.GenericRecord) DataMap(com.linkedin.data.DataMap)

Example 49 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class AvroConverter method convert.

public List<GenericRecord> convert(InputStream in) throws IOException {
    Decoder inputDecoder = (_inputFormat == AvroFormat.BINARY) ? DecoderFactory.defaultFactory().createBinaryDecoder(in, null) : (AvroFormat.JSON == _inputFormat) ? new JsonDecoder(_inputSchema, in) : null;
    ArrayList<GenericRecord> result = new ArrayList<GenericRecord>();
    GenericDatumReader<GenericRecord> genericReader = _inputSchema != _outputSchema ? new GenericDatumReader<GenericRecord>(_inputSchema, _outputSchema) : new GenericDatumReader<GenericRecord>(_inputSchema);
    switch(_inputFormat) {
        case BINARY:
        case JSON:
            {
                GenericRecord r = genericReader.read(null, inputDecoder);
                result.add(r);
                break;
            }
        case JSON_LINES:
            {
                InputStreamReader inReader = new InputStreamReader(in);
                try {
                    BufferedReader lineIn = new BufferedReader(inReader);
                    try {
                        String line;
                        while (null != (line = lineIn.readLine())) {
                            inputDecoder = new JsonDecoder(_inputSchema, line);
                            GenericRecord r = genericReader.read(null, inputDecoder);
                            result.add(r);
                            break;
                        }
                    } finally {
                        lineIn.close();
                    }
                } finally {
                    inReader.close();
                }
            }
        default:
            {
                throw new RuntimeException("Unimplemented input format: " + _inputFormat);
            }
    }
    return result;
}
Also used : JsonDecoder(org.apache.avro.io.JsonDecoder) InputStreamReader(java.io.InputStreamReader) ArrayList(java.util.ArrayList) BufferedReader(java.io.BufferedReader) JsonDecoder(org.apache.avro.io.JsonDecoder) Decoder(org.apache.avro.io.Decoder) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 50 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class GenericRecordDtailPrinter method printEvent.

/**
   * @see com.linkedin.databus2.tools.dtail.DtailPrinter#printEvent(com.linkedin.databus.core.DbusEventInternalReadable, com.linkedin.databus.client.pub.DbusEventDecoder)
   */
@Override
public ConsumerCallbackResult printEvent(DbusEventInternalReadable e, DbusEventDecoder eventDecoder) {
    DbusEventAvroDecoder avroDecoder = (DbusEventAvroDecoder) eventDecoder;
    switch(_metadataOutput) {
        case NONE:
            GenericRecord payload = eventDecoder.getGenericRecord(e, null);
            return payload != null ? printGenericRecord(payload) : ConsumerCallbackResult.SUCCESS;
        case ONLY:
            GenericRecord metadata = avroDecoder.getMetadata(e, null);
            return null != metadata ? printGenericRecord(metadata) : ConsumerCallbackResult.SUCCESS;
        case INCLUDE:
            GenericRecord payload1 = avroDecoder.getGenericRecord(e, null);
            GenericRecord metadata1 = avroDecoder.getMetadata(e, null);
            Schema pschema = Schema.createUnion(Arrays.asList(avroDecoder.getPayloadSchema(e).getSchema(), Schema.create(Type.NULL)));
            Field pfield = new Field("payload", pschema, "payload", null);
            VersionedSchema metaschema = avroDecoder.getMetadataSchema(e);
            Schema mschema = null != metaschema ? Schema.createUnion(Arrays.asList(metaschema.getSchema(), Schema.create(Type.NULL))) : Schema.createUnion(Arrays.asList(Schema.create(Type.INT), Schema.create(Type.NULL)));
            Field mfield = new Field("metadata", mschema, "metadata", null);
            Schema combined = Schema.createRecord(Arrays.asList(pfield, mfield));
            GenericRecord r = new GenericData.Record(combined);
            r.put(0, payload1);
            r.put(1, metadata1);
            return printGenericRecord(r);
        default:
            LOG.error("unknown metadata output mode: " + _metadataOutput);
            return ConsumerCallbackResult.ERROR_FATAL;
    }
}
Also used : Field(org.apache.avro.Schema.Field) DbusEventAvroDecoder(com.linkedin.databus.client.DbusEventAvroDecoder) Schema(org.apache.avro.Schema) VersionedSchema(com.linkedin.databus2.schemas.VersionedSchema) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) VersionedSchema(com.linkedin.databus2.schemas.VersionedSchema)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11