Search in sources :

Example 1 with SchemaParserFactory

use of com.linkedin.data.schema.SchemaParserFactory in project rest.li by linkedin.

the class SchemaTranslator method avroToDataSchema.

/**
   * Translate an Avro {@link Schema} to a {@link DataSchema}.
   * <p>
   * If the translation mode is {@link AvroToDataSchemaTranslationMode#RETURN_EMBEDDED_SCHEMA}
   * and a {@link DataSchema} is embedded in the Avro schema, then return the embedded schema.
   * An embedded schema is present if the Avro {@link Schema} has a "com.linkedin.data" property and the
   * "com.linkedin.data" property contains both "schema" and "optionalDefaultMode" properties.
   * The "schema" property provides the embedded {@link DataSchema}.
   * The "optionalDefaultMode" property provides how optional default values were translated.
   * <p>
   * If the translation mode is {@link AvroToDataSchemaTranslationMode#VERIFY_EMBEDDED_SCHEMA}
   * and a {@link DataSchema} is embedded in the Avro schema, then verify that the embedded schema
   * translates to the input Avro schema. If the translated and embedded schema is the same,
   * then return the embedded schema, else throw {@link IllegalArgumentException}.
   * <p>
   * If the translation mode is {@link com.linkedin.data.avro.AvroToDataSchemaTranslationMode#TRANSLATE}
   * or no embedded {@link DataSchema} is present, then this method
   * translates the provided Avro {@link Schema} to a {@link DataSchema}
   * as described follows:
   * <p>
   * This method translates union with null record fields in Avro {@link Schema}
   * to optional fields in {@link DataSchema}. Record fields
   * whose type is a union with null will be translated to a new type, and the field becomes optional.
   * If the Avro union has two types (one of them is the null type), then the new type of the
   * field is the non-null member type of the union. If the Avro union does not have two types
   * (one of them is the null type) then the new type of the field is a union type with the null type
   * removed from the original union.
   * <p>
   * This method also translates default values. If the field's type is a union with null
   * and has a default value, then this method also translates the default value of the field
   * to comply with the new type of the field. If the default value is null,
   * then remove the default value. If new type is not a union and the default value
   * is of the non-null member type, then assign the default value to the
   * non-null value within the union value (i.e. the value of the only entry within the
   * JSON object.) If the new type is a union and the default value is of the
   * non-null member type, then assign the default value to a JSON object
   * containing a single entry with the key being the member type discriminator of
   * the first union member and the value being the actual member value.
   * <p>
   * Both the schema and default value translation takes into account that default value
   * representation for Avro unions does not include the member type discriminator and
   * the type of the default value is always the 1st member of the union.
   *
   * @param avroSchemaInJson provides the JSON representation of the Avro {@link Schema}.
   * @param options specifies the {@link AvroToDataSchemaTranslationOptions}.
   * @return the translated {@link DataSchema}.
   * @throws IllegalArgumentException if the Avro {@link Schema} cannot be translated.
   */
public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSchemaTranslationOptions options) throws IllegalArgumentException {
    ValidationOptions validationOptions = SchemaParser.getDefaultSchemaParserValidationOptions();
    validationOptions.setAvroUnionMode(true);
    SchemaParserFactory parserFactory = SchemaParserFactory.instance(validationOptions);
    DataSchemaResolver resolver = getResolver(parserFactory, options);
    PegasusSchemaParser parser = parserFactory.create(resolver);
    parser.parse(avroSchemaInJson);
    if (parser.hasError()) {
        throw new IllegalArgumentException(parser.errorMessage());
    }
    assert (parser.topLevelDataSchemas().size() == 1);
    DataSchema dataSchema = parser.topLevelDataSchemas().get(0);
    DataSchema resultDataSchema = null;
    AvroToDataSchemaTranslationMode translationMode = options.getTranslationMode();
    if (translationMode == AvroToDataSchemaTranslationMode.RETURN_EMBEDDED_SCHEMA || translationMode == AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA) {
        // check for embedded schema
        Object dataProperty = dataSchema.getProperties().get(SchemaTranslator.DATA_PROPERTY);
        if (dataProperty != null && dataProperty.getClass() == DataMap.class) {
            Object schemaProperty = ((DataMap) dataProperty).get(SchemaTranslator.SCHEMA_PROPERTY);
            if (schemaProperty.getClass() == DataMap.class) {
                SchemaParser embeddedSchemaParser = SchemaParserFactory.instance().create(null);
                embeddedSchemaParser.parse(Arrays.asList(schemaProperty));
                if (embeddedSchemaParser.hasError()) {
                    throw new IllegalArgumentException("Embedded schema is invalid\n" + embeddedSchemaParser.errorMessage());
                }
                assert (embeddedSchemaParser.topLevelDataSchemas().size() == 1);
                resultDataSchema = embeddedSchemaParser.topLevelDataSchemas().get(0);
                if (translationMode == AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA) {
                    // additional verification to make sure that embedded schema translates to Avro schema
                    DataToAvroSchemaTranslationOptions dataToAvdoSchemaOptions = new DataToAvroSchemaTranslationOptions();
                    Object optionalDefaultModeProperty = ((DataMap) dataProperty).get(SchemaTranslator.OPTIONAL_DEFAULT_MODE_PROPERTY);
                    dataToAvdoSchemaOptions.setOptionalDefaultMode(OptionalDefaultMode.valueOf(optionalDefaultModeProperty.toString()));
                    Schema avroSchemaFromEmbedded = dataToAvroSchema(resultDataSchema, dataToAvdoSchemaOptions);
                    Schema avroSchemaFromJson = Schema.parse(avroSchemaInJson);
                    if (avroSchemaFromEmbedded.equals(avroSchemaFromJson) == false) {
                        throw new IllegalArgumentException("Embedded schema does not translate to input Avro schema: " + avroSchemaInJson);
                    }
                }
            }
        }
    }
    if (resultDataSchema == null) {
        // translationMode == TRANSLATE or no embedded schema
        DataSchemaTraverse traverse = new DataSchemaTraverse();
        traverse.traverse(dataSchema, AvroToDataSchemaConvertCallback.INSTANCE);
        // convert default values
        traverse.traverse(dataSchema, DefaultAvroToDataConvertCallback.INSTANCE);
        // make sure it can round-trip
        String dataSchemaJson = dataSchema.toString();
        resultDataSchema = DataTemplateUtil.parseSchema(dataSchemaJson);
    }
    return resultDataSchema;
}
Also used : PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) SchemaParserFactory(com.linkedin.data.schema.SchemaParserFactory) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) Schema(org.apache.avro.Schema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) ByteString(com.linkedin.data.ByteString) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataMap(com.linkedin.data.DataMap) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) FileDataSchemaResolver(com.linkedin.data.schema.resolver.FileDataSchemaResolver) DataSchemaResolver(com.linkedin.data.schema.DataSchemaResolver) DefaultDataSchemaResolver(com.linkedin.data.schema.resolver.DefaultDataSchemaResolver) DataSchemaTraverse(com.linkedin.data.schema.DataSchemaTraverse)

Aggregations

ByteString (com.linkedin.data.ByteString)1 DataMap (com.linkedin.data.DataMap)1 ArrayDataSchema (com.linkedin.data.schema.ArrayDataSchema)1 DataSchema (com.linkedin.data.schema.DataSchema)1 DataSchemaResolver (com.linkedin.data.schema.DataSchemaResolver)1 DataSchemaTraverse (com.linkedin.data.schema.DataSchemaTraverse)1 EnumDataSchema (com.linkedin.data.schema.EnumDataSchema)1 FixedDataSchema (com.linkedin.data.schema.FixedDataSchema)1 MapDataSchema (com.linkedin.data.schema.MapDataSchema)1 PegasusSchemaParser (com.linkedin.data.schema.PegasusSchemaParser)1 RecordDataSchema (com.linkedin.data.schema.RecordDataSchema)1 SchemaParser (com.linkedin.data.schema.SchemaParser)1 SchemaParserFactory (com.linkedin.data.schema.SchemaParserFactory)1 UnionDataSchema (com.linkedin.data.schema.UnionDataSchema)1 DefaultDataSchemaResolver (com.linkedin.data.schema.resolver.DefaultDataSchemaResolver)1 FileDataSchemaResolver (com.linkedin.data.schema.resolver.FileDataSchemaResolver)1 ValidationOptions (com.linkedin.data.schema.validation.ValidationOptions)1 Schema (org.apache.avro.Schema)1