Search in sources :

Example 1 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestSchemaTranslator method testUnionDefaultValues.

@Test
public void testUnionDefaultValues() throws IOException {
    boolean debug = true;
    final String emptySchemaText = "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"foo\", " + "  \"fields\" : [] " + "}";
    final Schema emptySchema = Schema.parse(emptySchemaText);
    final String emptyRecord = "{}";
    final String[] input = { "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"foo\", " + "  \"fields\" : [ " + "    { " + "      \"name\" : \"f1\", " + "      \"type\" : [ \"int\", \"null\" ], " + "      \"default\" : 42 " + "    }, " + "    { " + "      \"name\" : \"f2\", " + "      \"type\" : { " + "        \"type\" : \"record\", " + "        \"name\" : \"bar\", " + "        \"fields\" : [ " + "          { " + "            \"name\" : \"b1\", \"type\" : [ \"string\", \"null\" ] " + "          } " + "        ] " + "      }, " + "      \"default\" : { \"b1\" : \"abc\" } " + "    } " + "  ] " + "}", "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"foo\", " + "  \"fields\" : [ " + "    { " + "      \"name\" : \"f1\", " + "      \"type\" : [ \"int\", \"null\" ], " + "      \"default\" : 42 " + "    }, " + "    { " + "      \"name\" : \"f2\", " + "      \"type\" : { " + "        \"type\" : \"record\", " + "        \"name\" : \"bar\", " + "        \"fields\" : [ " + "          { " + "            \"name\" : \"b1\", \"type\" : [ \"string\", \"null\" ], \"default\" : \"abc\" " + "          } " + "        ] " + "      }, " + "      \"default\" : { } " + "    } " + "  ] " + "}" };
    for (String readerSchemaText : input) {
        final Schema readerSchema = Schema.parse(readerSchemaText);
        GenericRecord record = genericRecordFromString(emptyRecord, emptySchema, readerSchema);
        if (debug)
            System.out.println(record);
        SchemaParser parser = new SchemaParser();
        parser.getValidationOptions().setAvroUnionMode(true);
        parser.parse(readerSchemaText);
        if (debug)
            System.out.println(parser.errorMessage());
        assertFalse(parser.hasError());
    }
}
Also used : DataSchema(com.linkedin.data.schema.DataSchema) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) Test(org.testng.annotations.Test)

Example 2 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestSchemaTranslator method testToAvroSchema.

private void testToAvroSchema(String schemaText, Object[] row) throws IOException {
    boolean debug = false;
    if (debug)
        System.out.println(schemaText);
    for (int i = 1; i < row.length; i++) {
        Object[] modeInputs = (Object[]) row[i];
        OptionalDefaultMode[] optionalDefaultModes = (OptionalDefaultMode[]) modeInputs[0];
        Object expected = modeInputs[1];
        for (EmbedSchemaMode embedSchemaMode : EmbedSchemaMode.values()) {
            for (OptionalDefaultMode optionalDefaultMode : optionalDefaultModes) {
                DataSchema schema = TestUtil.dataSchemaFromString(schemaText);
                String preTranslateSchemaText = schema.toString();
                Exception exc = null;
                String avroTextFromSchema = null;
                try {
                    avroTextFromSchema = SchemaTranslator.dataToAvroSchemaJson(schema, new DataToAvroSchemaTranslationOptions(optionalDefaultMode, JsonBuilder.Pretty.SPACES, embedSchemaMode));
                    if (debug) {
                        System.out.println("EmbeddedSchema: " + embedSchemaMode + ", OptionalDefaultMode: " + optionalDefaultMode + ", Avro Schema: " + avroTextFromSchema);
                    }
                } catch (Exception e) {
                    exc = e;
                    if (debug) {
                        e.printStackTrace();
                    }
                }
                if (expected instanceof String) {
                    assertNull(exc);
                    String expectedAvroText = (String) expected;
                    if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                        // when embeddedSchema is enabled
                        // for map, array, enums. and records, we embed the original Pegasus schema
                        DataMap expectedAvroDataMap = TestUtil.dataMapFromString(expectedAvroText);
                        DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                        Object dataProperty = resultAvroDataMap.remove(SchemaTranslator.DATA_PROPERTY);
                        assertEquals(resultAvroDataMap, expectedAvroDataMap);
                        // look for embedded schema
                        assertNotNull(dataProperty);
                        assertTrue(dataProperty instanceof DataMap);
                        Object schemaProperty = ((DataMap) dataProperty).get(SchemaTranslator.SCHEMA_PROPERTY);
                        assertNotNull(schemaProperty);
                        assertTrue(schemaProperty instanceof DataMap);
                        // make sure embedded schema is same as the original schema
                        PegasusSchemaParser schemaParser = TestUtil.schemaParserFromObjects(Arrays.asList(schemaProperty));
                        DataSchema embeddedSchema = schemaParser.topLevelDataSchemas().get(0);
                        assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
                        // look for optional default mode
                        Object optionalDefaultModeProperty = ((DataMap) dataProperty).get(SchemaTranslator.OPTIONAL_DEFAULT_MODE_PROPERTY);
                        assertNotNull(optionalDefaultMode);
                        assertEquals(optionalDefaultModeProperty, optionalDefaultMode.toString());
                    } else {
                        // for unions and primitives, we never embed the pegasus schema
                        if (embedSchemaMode == EmbedSchemaMode.NONE && hasEmbeddedSchema(schema)) {
                            // make sure no embedded schema when
                            DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                            assertFalse(resultAvroDataMap.containsKey(SchemaTranslator.DATA_PROPERTY));
                        }
                        assertEquals(avroTextFromSchema, expectedAvroText);
                    }
                    String postTranslateSchemaText = schema.toString();
                    assertEquals(preTranslateSchemaText, postTranslateSchemaText);
                    // make sure Avro accepts it
                    Schema avroSchema = Schema.parse(avroTextFromSchema);
                    if (debug)
                        System.out.println("AvroSchema: " + avroSchema);
                    SchemaParser parser = new SchemaParser();
                    ValidationOptions options = new ValidationOptions();
                    options.setAvroUnionMode(true);
                    parser.setValidationOptions(options);
                    parser.parse(avroTextFromSchema);
                    assertFalse(parser.hasError(), parser.errorMessage());
                    if (optionalDefaultMode == DataToAvroSchemaTranslationOptions.DEFAULT_OPTIONAL_DEFAULT_MODE) {
                        // use other dataToAvroSchemaJson
                        String avroSchema2Json = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText));
                        String avroSchema2JsonCompact = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText), new DataToAvroSchemaTranslationOptions());
                        assertEquals(avroSchema2Json, avroSchema2JsonCompact);
                        Schema avroSchema2 = Schema.parse(avroSchema2Json);
                        assertEquals(avroSchema2, avroSchema);
                        // use dataToAvroSchema
                        Schema avroSchema3 = SchemaTranslator.dataToAvroSchema(TestUtil.dataSchemaFromString(schemaText));
                        assertEquals(avroSchema3, avroSchema2);
                    }
                    if (modeInputs.length >= 4) {
                        // check if the translated default value is good by using it.
                        // writer schema and Avro JSON value should not include fields with default values.
                        String writerSchemaText = (String) modeInputs[2];
                        String avroValueJson = (String) modeInputs[3];
                        Schema writerSchema = Schema.parse(writerSchemaText);
                        GenericRecord genericRecord = genericRecordFromString(avroValueJson, writerSchema, avroSchema);
                        if (modeInputs.length >= 5) {
                            String genericRecordJson = (String) modeInputs[4];
                            String genericRecordAsString = genericRecord.toString();
                            DataMap expectedGenericRecord = TestUtil.dataMapFromString(genericRecordJson);
                            DataMap resultGenericRecord = TestUtil.dataMapFromString(genericRecordAsString);
                            assertEquals(resultGenericRecord, expectedGenericRecord);
                        }
                    }
                    if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                        // if embedded schema is enabled, translate Avro back to Pegasus schema.
                        // the output Pegasus schema should be exactly same the input schema
                        // taking into account typeref.
                        AvroToDataSchemaTranslationOptions avroToDataSchemaMode = new AvroToDataSchemaTranslationOptions(AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA);
                        DataSchema embeddedSchema = SchemaTranslator.avroToDataSchema(avroTextFromSchema, avroToDataSchemaMode);
                        assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
                    }
                } else {
                    Class<?> expectedExceptionClass = (Class<?>) expected;
                    String expectedString = (String) modeInputs[2];
                    assertNotNull(exc);
                    assertNull(avroTextFromSchema);
                    assertTrue(expectedExceptionClass.isInstance(exc));
                    assertTrue(exc.getMessage().contains(expectedString), "\"" + exc.getMessage() + "\" does not contain \"" + expectedString + "\"");
                }
            }
        }
    }
}
Also used : PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataSchema(com.linkedin.data.schema.DataSchema) Schema(org.apache.avro.Schema) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) IOException(java.io.IOException) DataMap(com.linkedin.data.DataMap) DataSchema(com.linkedin.data.schema.DataSchema) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 3 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestSchemaTranslator method testToAvroSchemaInternal.

private void testToAvroSchemaInternal(String schemaText, OptionalDefaultMode[] optionalDefaultModes, String expected, String writerSchemaText, String avroValueJson, String expectedGenericRecordJson) throws IOException {
    for (EmbedSchemaMode embedSchemaMode : EmbedSchemaMode.values()) {
        for (OptionalDefaultMode optionalDefaultMode : optionalDefaultModes) {
            DataSchema schema = TestUtil.dataSchemaFromString(schemaText);
            String preTranslateSchemaText = schema.toString();
            String avroTextFromSchema = null;
            DataToAvroSchemaTranslationOptions transOptions = new DataToAvroSchemaTranslationOptions(optionalDefaultMode, JsonBuilder.Pretty.SPACES, embedSchemaMode);
            transOptions.setTyperefPropertiesExcludeSet(new HashSet<>(Arrays.asList("validate", "java")));
            avroTextFromSchema = SchemaTranslator.dataToAvroSchemaJson(schema, transOptions);
            if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                // when embeddedSchema is enabled
                // for map, array, enums. and records, we embed the original Pegasus schema
                DataMap expectedAvroDataMap = TestUtil.dataMapFromString(expected);
                DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                Object dataProperty = resultAvroDataMap.remove(SchemaTranslator.DATA_PROPERTY);
                assertEquals(resultAvroDataMap, expectedAvroDataMap);
                // look for embedded schema
                assertNotNull(dataProperty);
                assertTrue(dataProperty instanceof DataMap);
                Object schemaProperty = ((DataMap) dataProperty).get(SchemaTranslator.SCHEMA_PROPERTY);
                assertNotNull(schemaProperty);
                assertTrue(schemaProperty instanceof DataMap);
                // make sure embedded schema is same as the original schema
                PegasusSchemaParser schemaParser = TestUtil.schemaParserFromObjects(Arrays.asList(schemaProperty));
                DataSchema embeddedSchema = schemaParser.topLevelDataSchemas().get(0);
                assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
                // look for optional default mode
                Object optionalDefaultModeProperty = ((DataMap) dataProperty).get(SchemaTranslator.OPTIONAL_DEFAULT_MODE_PROPERTY);
                assertNotNull(optionalDefaultMode);
                assertEquals(optionalDefaultModeProperty, optionalDefaultMode.toString());
            } else {
                // for unions and primitives, we never embed the pegasus schema
                if (embedSchemaMode == EmbedSchemaMode.NONE && hasEmbeddedSchema(schema)) {
                    // make sure no embedded schema when
                    DataMap resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
                    assertFalse(resultAvroDataMap.containsKey(SchemaTranslator.DATA_PROPERTY));
                }
                assertEquals(avroTextFromSchema, expected);
            }
            String postTranslateSchemaText = schema.toString();
            assertEquals(postTranslateSchemaText, preTranslateSchemaText);
            // make sure Avro accepts it
            Schema avroSchema = Schema.parse(avroTextFromSchema);
            SchemaParser parser = new SchemaParser();
            ValidationOptions options = new ValidationOptions();
            options.setAvroUnionMode(true);
            parser.setValidationOptions(options);
            parser.parse(avroTextFromSchema);
            assertFalse(parser.hasError(), parser.errorMessage());
            if (optionalDefaultMode == DataToAvroSchemaTranslationOptions.DEFAULT_OPTIONAL_DEFAULT_MODE) {
                // use other dataToAvroSchemaJson
                String avroSchema2Json = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText));
                String avroSchema2JsonCompact = SchemaTranslator.dataToAvroSchemaJson(TestUtil.dataSchemaFromString(schemaText), new DataToAvroSchemaTranslationOptions());
                assertEquals(avroSchema2Json, avroSchema2JsonCompact);
                Schema avroSchema2 = Schema.parse(avroSchema2Json);
                assertEquals(avroSchema2, avroSchema);
                // use dataToAvroSchema
                Schema avroSchema3 = SchemaTranslator.dataToAvroSchema(TestUtil.dataSchemaFromString(schemaText));
                assertEquals(avroSchema3, avroSchema2);
            }
            if (writerSchemaText != null || avroValueJson != null) {
                // check if the translated default value is good by using it.
                // writer schema and Avro JSON value should not include fields with default values.
                Schema writerSchema = Schema.parse(writerSchemaText);
                GenericRecord genericRecord = genericRecordFromString(avroValueJson, writerSchema, avroSchema);
                if (expectedGenericRecordJson != null) {
                    String genericRecordAsString = genericRecord.toString();
                    assertEquals(genericRecordAsString, TestAvroUtil.serializedEnumValueProcessor(expectedGenericRecordJson));
                }
            }
            if (embedSchemaMode == EmbedSchemaMode.ROOT_ONLY && hasEmbeddedSchema(schema)) {
                // if embedded schema is enabled, translate Avro back to Pegasus schema.
                // the output Pegasus schema should be exactly same the input schema
                // taking into account typeref.
                AvroToDataSchemaTranslationOptions avroToDataSchemaMode = new AvroToDataSchemaTranslationOptions(AvroToDataSchemaTranslationMode.VERIFY_EMBEDDED_SCHEMA);
                DataSchema embeddedSchema = SchemaTranslator.avroToDataSchema(avroTextFromSchema, avroToDataSchemaMode);
                assertEquals(embeddedSchema, schema.getDereferencedDataSchema());
            }
        }
    }
}
Also used : PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) DataSchema(com.linkedin.data.schema.DataSchema) Schema(org.apache.avro.Schema) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) DataMap(com.linkedin.data.DataMap) DataSchema(com.linkedin.data.schema.DataSchema) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 4 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestSchemaTranslator method testPegasusDefaultToAvroOptionalSchemaTranslation.

@Test(dataProvider = "pegasusDefaultToAvroOptionalSchemaTranslationProvider", description = "Test schemaTranslator for default fields to optional fields translation, in different schema translation modes")
public void testPegasusDefaultToAvroOptionalSchemaTranslation(String... testSchemaTextAndExpected) throws IOException {
    String schemaText = null;
    String expectedAvroSchema = null;
    DataMap resultAvroDataMap = null;
    DataMap expectedAvroDataMap = null;
    schemaText = testSchemaTextAndExpected[0];
    expectedAvroSchema = testSchemaTextAndExpected[1];
    List<String> schemaTextForTesting = null;
    if (schemaText.contains("##T_START")) {
        String noTyperefSchemaText = schemaText.replace("##T_START", "").replace("##T_END", "");
        String typerefSchemaText = schemaText.replace("##T_START", "{ \"type\" : \"typeref\", \"name\" : \"Ref\", \"ref\" : ").replace("##T_END", "}");
        schemaTextForTesting = Arrays.asList(noTyperefSchemaText, typerefSchemaText);
    } else {
        schemaTextForTesting = Arrays.asList(schemaText);
    }
    for (String schemaStringText : schemaTextForTesting) {
        DataSchema schema = TestUtil.dataSchemaFromString(schemaStringText);
        String avroTextFromSchema = null;
        avroTextFromSchema = SchemaTranslator.dataToAvroSchemaJson(schema, new DataToAvroSchemaTranslationOptions(PegasusToAvroDefaultFieldTranslationMode.DO_NOT_TRANSLATE));
        resultAvroDataMap = TestUtil.dataMapFromString(avroTextFromSchema);
        expectedAvroDataMap = TestUtil.dataMapFromString(expectedAvroSchema);
        assertEquals(resultAvroDataMap, expectedAvroDataMap);
        // Test avro Schema
        Schema avroSchema = Schema.parse(avroTextFromSchema);
        // Test validation parsing
        SchemaParser parser = new SchemaParser();
        ValidationOptions options = new ValidationOptions();
        options.setAvroUnionMode(true);
        parser.setValidationOptions(options);
        parser.parse(avroTextFromSchema);
        assertFalse(parser.hasError(), parser.errorMessage());
    }
}
Also used : DataSchema(com.linkedin.data.schema.DataSchema) DataSchema(com.linkedin.data.schema.DataSchema) Schema(org.apache.avro.Schema) SchemaParser(com.linkedin.data.schema.SchemaParser) PegasusSchemaParser(com.linkedin.data.schema.PegasusSchemaParser) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) DataMap(com.linkedin.data.DataMap) Test(org.testng.annotations.Test)

Example 5 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class TestFilteredSchemaDataTranslation method testFilteredDataSchemaDataTranslation.

/**
 * Removed field from Pegasus schema.
 */
@Test
public void testFilteredDataSchemaDataTranslation() throws IOException {
    Object[][] inputs = { { "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"Foo\", " + "  \"fields\" : [ " + "    { \"name\" : \"a\", \"type\" : \"int\" }, " + "    { \"name\" : \"b\", \"type\" : [ \"null\", \"int\" ], \"default\" : null }, " + "    { \"name\" : \"removeMe\", \"type\" : \"int\" } " + "  ] " + "}", Predicates.hasChildWithNameValue("name", "removeMe"), "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"Foo\", " + "  \"fields\" : [ " + "    { \"name\" : \"a\", \"type\" : \"int\" }, " + "    { \"name\" : \"b\", \"type\" : \"int\", \"optional\" : true } " + "  ] " + "}", // "removeMe" is dropped from output because it is not in output schema
    "{ \"a\" : 1, \"b\" : { \"int\" : 2 }, \"removeMe\" : 3 }", "{ \"a\" : 1, \"b\" : 2 }", // "b" has null value is dropped from output, "removeMe" is dropped from output because it is not in output schema
    "{ \"a\" : 1, \"b\" : null, \"removeMe\" : 3 }", "{ \"a\" : 1 }" } };
    for (Object[] row : inputs) {
        int i = 0;
        String avroSchemaText = (String) row[i++];
        Predicate predicate = (Predicate) row[i++];
        String schemaText = (String) row[i++];
        Schema avroSchema = Schema.parse(avroSchemaText);
        System.out.println(avroSchema);
        RecordDataSchema schema = (RecordDataSchema) SchemaTranslator.avroToDataSchema(avroSchema);
        RecordDataSchema filteredSchema = (RecordDataSchema) Filters.removeByPredicate(schema, predicate, new SchemaParser());
        DataSchema expectedSchema = TestUtil.dataSchemaFromString(schemaText);
        System.out.println(filteredSchema);
        assertEquals(filteredSchema, expectedSchema);
        while (i < row.length) {
            String translationSourceJson = (String) row[i++];
            String translationExpectedJson = (String) row[i++];
            GenericRecord genericRecord = AvroUtil.genericRecordFromJson(translationSourceJson, avroSchema);
            DataMap dataMap = DataTranslator.genericRecordToDataMap(genericRecord, filteredSchema, avroSchema);
            assertEquals(dataMap, TestUtil.dataMapFromString(translationExpectedJson));
        }
    }
}
Also used : DataSchema(com.linkedin.data.schema.DataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) Schema(org.apache.avro.Schema) DataSchema(com.linkedin.data.schema.DataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) SchemaParser(com.linkedin.data.schema.SchemaParser) GenericRecord(org.apache.avro.generic.GenericRecord) Predicate(com.linkedin.data.it.Predicate) DataMap(com.linkedin.data.DataMap) Test(org.testng.annotations.Test)

Aggregations

SchemaParser (com.linkedin.data.schema.SchemaParser)20 DataSchema (com.linkedin.data.schema.DataSchema)16 PegasusSchemaParser (com.linkedin.data.schema.PegasusSchemaParser)14 NamedDataSchema (com.linkedin.data.schema.NamedDataSchema)11 Test (org.testng.annotations.Test)10 RecordDataSchema (com.linkedin.data.schema.RecordDataSchema)8 Schema (org.apache.avro.Schema)8 DataMap (com.linkedin.data.DataMap)7 PdlSchemaParser (com.linkedin.data.schema.grammar.PdlSchemaParser)6 ValidationOptions (com.linkedin.data.schema.validation.ValidationOptions)5 GenericRecord (org.apache.avro.generic.GenericRecord)5 Predicate (com.linkedin.data.it.Predicate)4 BeforeTest (org.testng.annotations.BeforeTest)3 DataSchemaLocation (com.linkedin.data.schema.DataSchemaLocation)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 DataSchemaResolver (com.linkedin.data.schema.DataSchemaResolver)1 DataSchemaTraverse (com.linkedin.data.schema.DataSchemaTraverse)1 SchemaParserFactory (com.linkedin.data.schema.SchemaParserFactory)1 DefaultDataSchemaResolver (com.linkedin.data.schema.resolver.DefaultDataSchemaResolver)1