Search in sources :

Example 1 with NamedDataSchema

use of com.linkedin.data.schema.NamedDataSchema in project rest.li by linkedin.

the class AvroSchemaGenerator method targetFiles.

protected List<File> targetFiles(File targetDirectory) {
    ArrayList<File> generatedFiles = new ArrayList<File>();
    DataSchemaResolver resolver = getSchemaResolver();
    Map<String, DataSchemaLocation> nameToLocations = resolver.nameToDataSchemaLocations();
    Map<String, NamedDataSchema> nameToSchema = resolver.bindings();
    for (Map.Entry<String, DataSchemaLocation> entry : nameToLocations.entrySet()) {
        String fullName = entry.getKey();
        DataSchemaLocation location = entry.getValue();
        if (_sourceLocations.contains(location) || _sources.contains(fullName)) {
            NamedDataSchema schema = nameToSchema.get(fullName);
            if (schema instanceof RecordDataSchema) {
                RecordDataSchema recordDataSchema = (RecordDataSchema) schema;
                File generatedFile = fileForAvroSchema(fullName, targetDirectory);
                generatedFiles.add(generatedFile);
                String preTranslateSchemaText = recordDataSchema.toString();
                String avroSchemaText = SchemaTranslator.dataToAvroSchemaJson(recordDataSchema, _options);
                _fileToAvroSchemaMap.put(generatedFile, avroSchemaText);
                String postTranslateSchemaText = recordDataSchema.toString();
                assert (preTranslateSchemaText.equals(postTranslateSchemaText));
            }
        }
    }
    return generatedFiles;
}
Also used : NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) DataSchemaResolver(com.linkedin.data.schema.DataSchemaResolver) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayList(java.util.ArrayList) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) FileDataSchemaLocation(com.linkedin.data.schema.resolver.FileDataSchemaLocation) DataSchemaLocation(com.linkedin.data.schema.DataSchemaLocation)

Example 2 with NamedDataSchema

use of com.linkedin.data.schema.NamedDataSchema in project rest.li by linkedin.

the class TestFilteredSchemaDataTranslation method testFilteredAvroSchemaDataTranslation.

/**
   * Removed derived field from Avro schema.
   */
@Test
public void testFilteredAvroSchemaDataTranslation() throws IOException {
    Object[][] inputs = { { "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"Foo\", " + "  \"fields\" : [ " + "    { \"name\" : \"a\", \"type\" : \"int\" }, " + "    { \"name\" : \"b\", \"type\" : \"int\", \"optional\" : true }, " + "    { \"name\" : \"c\", \"type\" : \"int\", \"optional\" : true, \"derived\" : true } " + "  ] " + "}", Predicates.hasChildWithNameValue("derived", true), "{ " + "  \"type\" : \"record\", " + "  \"name\" : \"Foo\", " + "  \"fields\" : [ " + "    { \"name\" : \"a\", \"type\" : \"int\" }, " + "    { \"name\" : \"b\", \"type\" : [ \"null\", \"int\" ], \"default\" : null } " + "  ] " + "}", // "c" is dropped from output because it is not in the output schema
    "{ \"a\" : 1, \"b\" : 2, \"c\" : 3 }", "{ \"a\" : 1, \"b\" : { \"int\" : 2 } }", // "b" is translated to null and "c" is dropped from output because it is not in the output schema
    "{ \"a\" : 1, \"c\" : 3 }", "{ \"a\" : 1, \"b\" : null }" } };
    for (Object[] row : inputs) {
        int i = 0;
        String schemaText = (String) row[i++];
        Predicate predicate = (Predicate) row[i++];
        String avroSchemaText = (String) row[i++];
        RecordDataSchema schema = (RecordDataSchema) TestUtil.dataSchemaFromString(schemaText);
        NamedDataSchema filteredSchema = Filters.removeByPredicate(schema, predicate, new SchemaParser());
        Schema filteredAvroSchema = SchemaTranslator.dataToAvroSchema(filteredSchema);
        Schema expectedAvroSchema = Schema.parse(avroSchemaText);
        assertEquals(filteredAvroSchema, expectedAvroSchema);
        while (i < row.length) {
            String translationSourceJson = (String) row[i++];
            String translationResultJson = (String) row[i++];
            DataMap dataMap = TestUtil.dataMapFromString(translationSourceJson);
            GenericRecord genericRecord = DataTranslator.dataMapToGenericRecord(dataMap, schema, filteredAvroSchema);
            String avroJson = AvroUtil.jsonFromGenericRecord(genericRecord);
            DataMap avroJsonAsDataMap = TestUtil.dataMapFromString(avroJson);
            assertEquals(avroJsonAsDataMap, TestUtil.dataMapFromString(translationResultJson));
        }
    }
}
Also used : NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) Schema(org.apache.avro.Schema) DataSchema(com.linkedin.data.schema.DataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) SchemaParser(com.linkedin.data.schema.SchemaParser) GenericRecord(org.apache.avro.generic.GenericRecord) Predicate(com.linkedin.data.it.Predicate) DataMap(com.linkedin.data.DataMap) Test(org.testng.annotations.Test)

Example 3 with NamedDataSchema

use of com.linkedin.data.schema.NamedDataSchema in project rest.li by linkedin.

the class PdlSchemaParser method parseIncludes.

private FieldsAndIncludes parseIncludes(PdlParser.FieldIncludesContext includeSet) throws ParseException {
    List<NamedDataSchema> includes = new ArrayList<>();
    Set<NamedDataSchema> includesDeclaredInline = new HashSet<>();
    List<Field> fields = new ArrayList<>();
    if (includeSet != null) {
        List<TypeAssignmentContext> includeTypes = includeSet.typeAssignment();
        for (TypeAssignmentContext includeRef : includeTypes) {
            DataSchema includedSchema = toDataSchema(includeRef);
            if (includedSchema != null) {
                DataSchema dereferencedIncludedSchema = includedSchema.getDereferencedDataSchema();
                if (includedSchema instanceof NamedDataSchema && dereferencedIncludedSchema instanceof RecordDataSchema) {
                    NamedDataSchema includedNamedSchema = (NamedDataSchema) includedSchema;
                    RecordDataSchema dereferencedIncludedRecordSchema = (RecordDataSchema) dereferencedIncludedSchema;
                    fields.addAll(dereferencedIncludedRecordSchema.getFields());
                    includes.add(includedNamedSchema);
                    if (isDeclaredInline(includeRef)) {
                        includesDeclaredInline.add(includedNamedSchema);
                    }
                } else {
                    startErrorMessage(includeRef).append("Include is not a record type or a typeref to a record type: ").append(includeRef).append(NEWLINE);
                }
            } else {
                startErrorMessage(includeRef).append("Unable to resolve included schema: ").append(includeRef).append(NEWLINE);
            }
        }
    }
    return new FieldsAndIncludes(fields, includes, includesDeclaredInline);
}
Also used : NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) TyperefDataSchema(com.linkedin.data.schema.TyperefDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) Field(com.linkedin.data.schema.RecordDataSchema.Field) TypeAssignmentContext(com.linkedin.data.grammar.PdlParser.TypeAssignmentContext) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Example 4 with NamedDataSchema

use of com.linkedin.data.schema.NamedDataSchema in project rest.li by linkedin.

the class PdlSchemaParser method parse.

/**
   * Parse list of Data objects.
   *
   * The {{DataSchema}'s parsed are in {{#topLevelDataSchemas}.
   * Parse errors are in {{#errorMessageBuilder} and indicated
   * by {{#hasError()}.
   *
   * @param document provides the source code in AST form
   */
private DataSchema parse(DocumentContext document) throws ParseException {
    PdlParser.NamespaceDeclarationContext namespaceDecl = document.namespaceDeclaration();
    if (namespaceDecl != null) {
        setCurrentNamespace(namespaceDecl.qualifiedIdentifier().value);
    } else {
        setCurrentNamespace("");
    }
    if (document.packageDeclaration() != null) {
        setCurrentPackage(document.packageDeclaration().qualifiedIdentifier().value);
    } else {
        setCurrentPackage(null);
    }
    setCurrentImports(document.importDeclarations());
    TypeDeclarationContext typeDeclaration = document.typeDeclaration();
    DataSchema schema;
    if (typeDeclaration.namedTypeDeclaration() != null) {
        NamedDataSchema namedSchema = parseNamedType(typeDeclaration.namedTypeDeclaration());
        if (!namedSchema.getNamespace().equals(getCurrentNamespace())) {
            throw new ParseException(typeDeclaration, "Top level type declaration may not be qualified with a namespace different than the file namespace: " + typeDeclaration.getText());
        }
        schema = namedSchema;
    } else if (typeDeclaration.anonymousTypeDeclaration() != null) {
        schema = parseAnonymousType(typeDeclaration.anonymousTypeDeclaration());
    } else {
        throw new ParseException(typeDeclaration, "Unrecognized type declaration: " + typeDeclaration.getText());
    }
    addTopLevelSchema(schema);
    return schema;
}
Also used : UnionDataSchema(com.linkedin.data.schema.UnionDataSchema) EnumDataSchema(com.linkedin.data.schema.EnumDataSchema) TyperefDataSchema(com.linkedin.data.schema.TyperefDataSchema) RecordDataSchema(com.linkedin.data.schema.RecordDataSchema) ArrayDataSchema(com.linkedin.data.schema.ArrayDataSchema) FixedDataSchema(com.linkedin.data.schema.FixedDataSchema) DataSchema(com.linkedin.data.schema.DataSchema) MapDataSchema(com.linkedin.data.schema.MapDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) PdlParser(com.linkedin.data.grammar.PdlParser) TypeDeclarationContext(com.linkedin.data.grammar.PdlParser.TypeDeclarationContext) NamedTypeDeclarationContext(com.linkedin.data.grammar.PdlParser.NamedTypeDeclarationContext) AnonymousTypeDeclarationContext(com.linkedin.data.grammar.PdlParser.AnonymousTypeDeclarationContext)

Example 5 with NamedDataSchema

use of com.linkedin.data.schema.NamedDataSchema in project rest.li by linkedin.

the class Filters method removeByPredicate.

/**
   * Remove parts of a {@link NamedDataSchema} that matches the specified predicate.
   *
   * This method obtains a {@link DataMap} representation of the {@link NamedDataSchema}
   * by invoking {@link com.linkedin.data.schema.util.Conversions#dataSchemaToDataMap}.
   * Then it performs an pre-order traversal of this {@link DataMap} and evaluates the
   * provided predicate on each Data object visited. If the predicate evaluates to true, the
   * matching Data object will be removed. After the {@link DataMap} has been traversed and
   * matching Data objects have been removed, the provided {@link SchemaParser} will be used
   * to parse the JSON representation of the filtered {@link DataMap}. If there are no
   * parsing errors, this method returns the {@link NamedDataSchema} parsed from the JSON representation.
   *
   * If there are parsing errors, the errors may be obtained from the provided {@link SchemaParser}.
   *
   * @param schema provides the {@link NamedDataSchema} to be filtered.
   * @param predicate provides the {@link Predicate} to be evaluated.
   * @param parser provides the {@link SchemaParser} to be used to parse the filtered {@link DataMap}.
   * @return a filtered {@link NamedDataSchema} if the filtered schema is valid, else return null.
   */
public static NamedDataSchema removeByPredicate(NamedDataSchema schema, Predicate predicate, PegasusSchemaParser parser) {
    DataMap schemaAsDataMap = dataSchemaToDataMap(schema);
    DataMap map = (DataMap) Builder.create(schemaAsDataMap, null, IterationOrder.PRE_ORDER).filterBy(predicate).remove();
    if (map == null) {
        parser.errorMessageBuilder().append(NO_SCHEMA_LEFT);
    }
    DataSchema resultDataSchema = dataMapToDataSchema(map, parser);
    if (resultDataSchema == null) {
        parser.errorMessageBuilder().append(INVALID_SCHEMA_LEFT + map);
    }
    return (NamedDataSchema) resultDataSchema;
}
Also used : DataSchema(com.linkedin.data.schema.DataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) Conversions.dataMapToDataSchema(com.linkedin.data.schema.util.Conversions.dataMapToDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) DataMap(com.linkedin.data.DataMap) Conversions.dataSchemaToDataMap(com.linkedin.data.schema.util.Conversions.dataSchemaToDataMap)

Aggregations

NamedDataSchema (com.linkedin.data.schema.NamedDataSchema)46 DataSchema (com.linkedin.data.schema.DataSchema)25 RecordDataSchema (com.linkedin.data.schema.RecordDataSchema)16 ArrayDataSchema (com.linkedin.data.schema.ArrayDataSchema)12 MapDataSchema (com.linkedin.data.schema.MapDataSchema)11 TyperefDataSchema (com.linkedin.data.schema.TyperefDataSchema)11 UnionDataSchema (com.linkedin.data.schema.UnionDataSchema)11 DataMap (com.linkedin.data.DataMap)8 IOException (java.io.IOException)8 DataSchemaLocation (com.linkedin.data.schema.DataSchemaLocation)7 PrimitiveDataSchema (com.linkedin.data.schema.PrimitiveDataSchema)7 EnumDataSchema (com.linkedin.data.schema.EnumDataSchema)6 FixedDataSchema (com.linkedin.data.schema.FixedDataSchema)6 SchemaParser (com.linkedin.data.schema.SchemaParser)6 ComplexDataSchema (com.linkedin.data.schema.ComplexDataSchema)5 File (java.io.File)5 Test (org.testng.annotations.Test)5 CustomInfoSpec (com.linkedin.pegasus.generator.spec.CustomInfoSpec)4 RestLiInternalException (com.linkedin.restli.internal.server.RestLiInternalException)4 FileInputStream (java.io.FileInputStream)4