Search in sources :

Example 11 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class AbstractGenerator method parseSchema.

/**
   * Parse a source file to obtain the data schemas contained within.
   *
   * @param schemaSourceFile provides the source file.
   * @return the data schemas within the source file.
   * @throws IOException if there is a file access error.
   */
protected List<DataSchema> parseSchema(final File schemaSourceFile) throws IOException {
    PegasusSchemaParser parser = new SchemaParser(getSchemaResolver());
    FileInputStream schemaStream = new SchemaFileInputStream(schemaSourceFile);
    try {
        parser.setLocation(new FileDataSchemaLocation(schemaSourceFile));
        parser.parse(schemaStream);
        if (parser.hasError()) {
            return Collections.emptyList();
        }
        return parser.topLevelDataSchemas();
    } finally {
        schemaStream.close();
        if (parser.hasError()) {
            getMessage().append(schemaSourceFile.getPath() + ",");
            getMessage().append(parser.errorMessage());
        }
    }
}
Also used : FileDataSchemaLocation(com.linkedin.data.schema.resolver.FileDataSchemaLocation) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) FileInputStream(java.io.FileInputStream)

Example 12 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestDataSchemaResolver method lookup.

public void lookup(DataSchemaResolver resolver, String[][] lookups, char separator, boolean debug) {
    PegasusSchemaParser parser = new SchemaParser(resolver);
    for (String[] entry : lookups) {
        String name = entry[0];
        String expectFound = entry[1];
        String expected = entry[2];
        DataSchema schema = parser.lookupName(name);
        if (debug) {
            out.println("----" + name + "-----");
        }
        String errorMessage = parser.errorMessage();
        if (debug && errorMessage.isEmpty() == false) {
            out.println(errorMessage);
        }
        if (expectFound == ERROR) {
            assertTrue(parser.hasError());
            assertTrue(expected == null || errorMessage.contains(expected));
        } else if (expectFound == FOUND) {
            assertTrue(schema != null);
            String schemaText = schema.toString();
            if (debug) {
                out.println(schemaText);
            }
            assertFalse(parser.hasError());
            assertTrue(schema instanceof NamedDataSchema);
            NamedDataSchema namedSchema = (NamedDataSchema) schema;
            assertEquals(namedSchema.getFullName(), name);
            assertTrue(schemaText.contains(expected));
            assertTrue(resolver.bindings().containsKey(name));
            assertSame(resolver.bindings().get(name), namedSchema);
            String location = entry[3];
            DataSchemaLocation namedSchemalocation = resolver.nameToDataSchemaLocations().get(name);
            String locationNorm;
            if (namedSchemalocation.toString().contains(".jar")) {
                locationNorm = location.replace(separator, '/');
            } else {
                locationNorm = location.replace('/', separator);
            }
            assertNotNull(namedSchemalocation);
            assertEquals(namedSchemalocation.toString().indexOf(locationNorm), namedSchemalocation.toString().length() - locationNorm.length());
            assertTrue(resolver.locationResolved(namedSchemalocation));
        } else if (expectFound == NOT_FOUND) {
            assertTrue(schema == null);
            assertFalse(parser.hasError());
            assertTrue(expected == null || errorMessage.contains(expected));
            assertFalse(resolver.bindings().containsKey(name));
        } else {
            assertTrue(false);
        }
    }
}
Also used : DataSchema(com.linkedin.data.schema.DataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataSchemaLocation(com.linkedin.data.schema.DataSchemaLocation)

Example 13 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestConversions method testConvertDataMapToDataSchema.

@Test
public void testConvertDataMapToDataSchema() throws IOException {
    for (String good : goodInputs) {
        NamedDataSchema dataSchema = (NamedDataSchema) TestUtil.dataSchemaFromString(good);
        DataMap mapFromString = TestUtil.dataMapFromString(good);
        PegasusSchemaParser parser = new SchemaParser();
        DataSchema schemaFromMap = Conversions.dataMapToDataSchema(mapFromString, parser);
        assertEquals(schemaFromMap, dataSchema);
    }
    for (String bad : badInputs) {
        DataMap mapFromString = TestUtil.dataMapFromString(bad);
        PegasusSchemaParser parser = new SchemaParser();
        DataSchema schemaFromMap = Conversions.dataMapToDataSchema(mapFromString, parser);
        assertNull(schemaFromMap);
        assertTrue(parser.hasError());
    }
}
Also used : NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataMap(com.linkedin.data.DataMap) Test(org.testng.annotations.Test)

Example 14 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestSchemaFilter method dataSchemaFromString.

private static NamedDataSchema dataSchemaFromString(String s, boolean isAvroUnionMode) throws IOException {
    SchemaParser parser = new SchemaParser();
    parser.getValidationOptions().setAvroUnionMode(isAvroUnionMode);
    parser.parse(TestUtil.inputStreamFromString(s));
    if (parser.hasError()) {
        TestUtil.out.println("ERROR: " + parser.errorMessage());
        return null;
    }
    return (NamedDataSchema) parser.topLevelDataSchemas().get(parser.topLevelDataSchemas().size() - 1);
}
Also used : NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) SchemaParser(com.linkedin.data.schema.SchemaParser)

Example 15 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class SchemaTranslator method avroToDataSchema.

/**
   * Translate an Avro {@link Schema} to a {@link DataSchema}.
   * <p>
   * If the translation mode is {@link AvroToDataSchemaTranslationMode#RETURN_EMBEDDED_SCHEMA}
   * and a {@link DataSchema} is embedded in the Avro schema, then return the embedded schema.
   * An embedded schema is present if the Avro {@link Schema} has a "com.linkedin.data" property and the
   * "com.linkedin.data" property contains both "schema" and "optionalDefaultMode" properties.
   * The "schema" property provides the embedded {@link DataSchema}.
   * The "optionalDefaultMode" property provides how optional default values were translated.
   * <p>
   * If the translation mode is {@link AvroToDataSchemaTranslationMode#VERIFY_EMBEDDED_SCHEMA}
   * and a {@link DataSchema} is embedded in the Avro schema, then verify that the embedded schema
   * translates to the input Avro schema. If the translated and embedded schema is the same,
   * then return the embedded schema, else throw {@link IllegalArgumentException}.
   * <p>
   * If the translation mode is {@link com.linkedin.data.avro.AvroToDataSchemaTranslationMode#TRANSLATE}
   * or no embedded {@link DataSchema} is present, then this method
   * translates the provided Avro {@link Schema} to a {@link DataSchema}
   * as described follows:
   * <p>
   * This method translates union with null record fields in Avro {@link Schema}
   * to optional fields in {@link DataSchema}. Record fields
   * whose type is a union with null will be translated to a new type, and the field becomes optional.
   * If the Avro union has two types (one of them is the null type), then the new type of the
   * field is the non-null member type of the union. If the Avro union does not have two types
   * (one of them is the null type) then the new type of the field is a union type with the null type
   * removed from the original union.
   * <p>
   * This method also translates default values. If the field's type is a union with null
   * and has a default value, then this method also translates the default value of the field
   * to comply with the new type of the field. If the default value is null,
   * then remove the default value. If new type is not a union and the default value
   * is of the non-null member type, then assign the default value to the
   * non-null value within the union value (i.e. the value of the only entry within the
   * JSON object.) If the new type is a union and the default value is of the
   * non-null member type, then assign the default value to a JSON object
   * containing a single entry with the key being the member type discriminator of
   * the first union member and the value being the actual member value.
   * <p>
   * Both the schema and default value translation takes into account that default value
   * representation for Avro unions does not include the member type discriminator and
   * the type of the default value is always the 1st member of the union.
   *
   * @param avroSchemaInJson provides the JSON representation of the Avro {@link Schema}.
   * @param options specifies the {@link AvroToDataSchemaTranslationOptions}.
   * @return the translated {@link DataSchema}.
   * @throws IllegalArgumentException if the Avro {@link Schema} cannot be translated.
   */
public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSchemaTranslationOptions options) throws IllegalArgumentException {
    ValidationOptions validationOptions = SchemaParser.getDefaultSchemaParserValidationOptions();
    validationOptions.setAvroUnionMode(true);
    SchemaParserFactory parserFactory = SchemaParserFactory.instance(validationOptions);
    DataSchemaResolver resolver = getResolver(parserFactory, options);
    PegasusSchemaParser parser = parserFactory.create(resolver);
    parser.parse(avroSchemaInJson);
    if (parser.hasError()) {
        throw new IllegalArgumentException(parser.errorMessage());
    }
    assert (parser.topLevelDataSchemas().size() == 1);
    DataSchema dataSchema = parser.topLevelDataSchemas().get(0);
    DataSchema resultDataSchema = null;
    AvroToDataSchemaTranslationMode translationMode = options.getTranslationMode();
    if (translationMode == AvroToDataSchemaTranslationMode.RETURN_EMBEDDED_SCHEMA || translationMode == AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA) {
        // check for embedded schema
        Object dataProperty = dataSchema.getProperties().get(SchemaTranslator.DATA_PROPERTY);
        if (dataProperty != null && dataProperty.getClass() == DataMap.class) {
            Object schemaProperty = ((DataMap) dataProperty).get(SchemaTranslator.SCHEMA_PROPERTY);
            if (schemaProperty.getClass() == DataMap.class) {
                SchemaParser embeddedSchemaParser = SchemaParserFactory.instance().create(null);
                embeddedSchemaParser.parse(Arrays.asList(schemaProperty));
                if (embeddedSchemaParser.hasError()) {
                    throw new IllegalArgumentException("Embedded schema is invalid\n" + embeddedSchemaParser.errorMessage());
                }
                assert (embeddedSchemaParser.topLevelDataSchemas().size() == 1);
                resultDataSchema = embeddedSchemaParser.topLevelDataSchemas().get(0);
                if (translationMode == AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA) {
                    // additional verification to make sure that embedded schema translates to Avro schema
                    DataToAvroSchemaTranslationOptions dataToAvdoSchemaOptions = new DataToAvroSchemaTranslationOptions();
                    Object optionalDefaultModeProperty = ((DataMap) dataProperty).get(SchemaTranslator.OPTIONAL_DEFAULT_MODE_PROPERTY);
                    dataToAvdoSchemaOptions.setOptionalDefaultMode(OptionalDefaultMode.valueOf(optionalDefaultModeProperty.toString()));
                    Schema avroSchemaFromEmbedded = dataToAvroSchema(resultDataSchema, dataToAvdoSchemaOptions);
                    Schema avroSchemaFromJson = Schema.parse(avroSchemaInJson);
                    if (avroSchemaFromEmbedded.equals(avroSchemaFromJson) == false) {
                        throw new IllegalArgumentException("Embedded schema does not translate to input Avro schema: " + avroSchemaInJson);
                    }
                }
            }
        }
    }
    if (resultDataSchema == null) {
        // translationMode == TRANSLATE or no embedded schema
        DataSchemaTraverse traverse = new DataSchemaTraverse();
        traverse.traverse(dataSchema, AvroToDataSchemaConvertCallback.INSTANCE);
        // convert default values
        traverse.traverse(dataSchema, DefaultAvroToDataConvertCallback.INSTANCE);
        // make sure it can round-trip
        String dataSchemaJson = dataSchema.toString();
        resultDataSchema = DataTemplateUtil.parseSchema(dataSchemaJson);
    }
    return resultDataSchema;
}
Also used : PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) SchemaParserFactory(com.linkedin.data.schema.SchemaParserFactory) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) Schema(org.apache.avro.Schema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) ByteString(com.linkedin.data.ByteString) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataMap(com.linkedin.data.DataMap) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) FileDataSchemaResolver(com.linkedin.data.schema.resolver.FileDataSchemaResolver) DataSchemaResolver(com.linkedin.data.schema.DataSchemaResolver) DefaultDataSchemaResolver(com.linkedin.data.schema.resolver.DefaultDataSchemaResolver) DataSchemaTraverse(com.linkedin.data.schema.DataSchemaTraverse)

Aggregations

SchemaParser (com.linkedin.data.schema.SchemaParser)16 DataSchema (com.linkedin.data.schema.DataSchema)11 PegasusSchemaParser (com.linkedin.data.schema.PegasusSchemaParser)10 NamedDataSchema (com.linkedin.data.schema.NamedDataSchema)9 Test (org.testng.annotations.Test)7 RecordDataSchema (com.linkedin.data.schema.RecordDataSchema)6 DataMap (com.linkedin.data.DataMap)5 Schema (org.apache.avro.Schema)5 Predicate (com.linkedin.data.it.Predicate)4 GenericRecord (org.apache.avro.generic.GenericRecord)4 ValidationOptions (com.linkedin.data.schema.validation.ValidationOptions)3 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 BeforeTest (org.testng.annotations.BeforeTest)2 ByteString (com.linkedin.data.ByteString)1 ArrayDataSchema (com.linkedin.data.schema.ArrayDataSchema)1 DataSchemaLocation (com.linkedin.data.schema.DataSchemaLocation)1 DataSchemaResolver (com.linkedin.data.schema.DataSchemaResolver)1 DataSchemaTraverse (com.linkedin.data.schema.DataSchemaTraverse)1 EnumDataSchema (com.linkedin.data.schema.EnumDataSchema)1