Search in sources :

Example 1 with NumberFeature

use of org.apache.stanbol.enhancer.nlp.morpho.NumberFeature in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method validateMorphoFeatureProperty.

/**
     * [1..*] values of an {@link TypedLiteral} in the form {key=value}
     * @param enhancements The graph with the enhancements
     * @param textAnnotation the TextAnnotation to check
     */
private void validateMorphoFeatureProperty(Graph enhancements, BlankNodeOrIRI textAnnotation) {
    //This taste checks for known morpho features of a given input (constant TERM)
    Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
    assertTrue("No POS Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    while (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            LexicalCategory cat = LexicalCategory.valueOf(key);
            assertTrue("Part of Speech of " + TERM + " should be " + LexicalCategory.Noun, (cat == LexicalCategory.Noun));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
    assertTrue("No Gender Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            Gender cat = Gender.valueOf(key);
            assertTrue("Gender of " + TERM + " should be " + Gender.Feminine, (cat == Gender.Feminine));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
    assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof IRI);
        String feature = ((IRI) morphoFeature).getUnicodeString();
        assertFalse("Morpho Feature MUST NOT be empty", feature.isEmpty());
        if (feature.startsWith(OLIA_NAMESPACE)) {
            String key = feature.substring(OLIA_NAMESPACE.length());
            NumberFeature cat = NumberFeature.valueOf(key);
            assertTrue("Number of " + TERM + " should be " + Gender.Feminine, (cat == NumberFeature.Singular));
        }
    }
    morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
    assertTrue("No Number Morpho Feature value found for TextAnnotation " + textAnnotation + "!", morphoFeatureIterator.hasNext());
    if (morphoFeatureIterator.hasNext()) {
        RDFTerm morphoFeature = morphoFeatureIterator.next().getObject();
        assertTrue("Lemma Forms value are expected of type Literal", morphoFeature instanceof Literal);
        assertFalse("Lemma forms MUST NOT be empty", ((Literal) morphoFeature).getLexicalForm().isEmpty());
        String feature = ((Literal) morphoFeature).getLexicalForm();
        assertTrue("Lemma of " + TERM + " should be " + TERM, (feature.equals(TERM)));
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) NumberFeature(org.apache.stanbol.enhancer.nlp.morpho.NumberFeature) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Gender(org.apache.stanbol.enhancer.nlp.morpho.Gender) LexicalCategory(org.apache.stanbol.enhancer.nlp.pos.LexicalCategory)

Example 2 with NumberFeature

use of org.apache.stanbol.enhancer.nlp.morpho.NumberFeature in project stanbol by apache.

the class MorphoFeaturesSupport method parse.

@Override
public MorphoFeatures parse(ObjectNode jMorpho, AnalysedText at) {
    JsonNode jLemma = jMorpho.path("lemma");
    if (!jLemma.isTextual()) {
        throw new IllegalStateException("Field 'lemma' MUST provide a String value (parsed JSON: " + jMorpho);
    }
    MorphoFeatures morpho = new MorphoFeatures(jLemma.asText());
    JsonNode node = jMorpho.path("case");
    if (node.isArray()) {
        ArrayNode jCases = (ArrayNode) node;
        for (int i = 0; i < jCases.size(); i++) {
            JsonNode member = jCases.get(i);
            if (member.isObject()) {
                ObjectNode jCase = (ObjectNode) member;
                JsonNode tag = jCase.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Case> type = JsonUtils.parseEnum(jCase, "type", Case.class);
                    if (type.isEmpty()) {
                        morpho.addCase(new CaseTag(tag.getTextValue()));
                    } else {
                        morpho.addCase(new CaseTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse CaseTag becuase 'tag' value is " + "missing or is not a String (json: " + jCase.toString() + ")");
                }
            } else {
                log.warn("Unable to parse CaseTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse CaseTags (Json Array expected as value for field 'case' but was " + node);
    }
    if (jMorpho.has("definitness")) {
        for (Definitness d : JsonUtils.parseEnum(jMorpho, "definitness", Definitness.class)) {
            morpho.addDefinitness(d);
        }
    }
    node = jMorpho.path("gender");
    if (node.isArray()) {
        ArrayNode jGenders = (ArrayNode) node;
        for (int i = 0; i < jGenders.size(); i++) {
            JsonNode member = jGenders.get(i);
            if (member.isObject()) {
                ObjectNode jGender = (ObjectNode) member;
                JsonNode tag = jGender.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Gender> type = JsonUtils.parseEnum(jGender, "type", Gender.class);
                    if (type.isEmpty()) {
                        morpho.addGender(new GenderTag(tag.getTextValue()));
                    } else {
                        morpho.addGender(new GenderTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse GenderTag becuase 'tag' value is " + "missing or is not a String (json: " + jGender.toString() + ")");
                }
            } else {
                log.warn("Unable to parse GenderTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse GenderTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("number");
    if (node.isArray()) {
        ArrayNode jNumbers = (ArrayNode) node;
        for (int i = 0; i < jNumbers.size(); i++) {
            JsonNode member = jNumbers.get(i);
            if (member.isObject()) {
                ObjectNode jNumber = (ObjectNode) member;
                JsonNode tag = jNumber.path("tag");
                if (tag.isTextual()) {
                    EnumSet<NumberFeature> type = JsonUtils.parseEnum(jNumber, "type", NumberFeature.class);
                    if (type.isEmpty()) {
                        morpho.addNumber(new NumberTag(tag.getTextValue()));
                    } else {
                        morpho.addNumber(new NumberTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse NumberTag becuase 'tag' value is " + "missing or is not a String (json: " + jNumber.toString() + ")");
                }
            } else {
                log.warn("Unable to parse NumberTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse NumberTag (Json Array expected as value for field 'case' but was " + node);
    }
    if (jMorpho.has("person")) {
        for (Person p : JsonUtils.parseEnum(jMorpho, "person", Person.class)) {
            morpho.addPerson(p);
        }
    }
    node = jMorpho.path("pos");
    if (node.isArray()) {
        ArrayNode jPosTags = (ArrayNode) node;
        for (int i = 0; i < jPosTags.size(); i++) {
            JsonNode member = jPosTags.get(i);
            if (member.isObject()) {
                ObjectNode jPosTag = (ObjectNode) member;
                morpho.addPos(getPosTagParser().parse(jPosTag, at));
            } else {
                log.warn("Unable to parse PosTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse PosTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("tense");
    if (node.isArray()) {
        ArrayNode jTenses = (ArrayNode) node;
        for (int i = 0; i < jTenses.size(); i++) {
            JsonNode member = jTenses.get(i);
            if (member.isObject()) {
                ObjectNode jTense = (ObjectNode) member;
                JsonNode tag = jTense.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Tense> type = JsonUtils.parseEnum(jTense, "type", Tense.class);
                    if (type.isEmpty()) {
                        morpho.addTense(new TenseTag(tag.getTextValue()));
                    } else {
                        morpho.addTense(new TenseTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse TenseTag becuase 'tag' value is " + "missing or is not a String (json: " + jTense.toString() + ")");
                }
            } else {
                log.warn("Unable to parse TenseTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse TenseTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("verb-mood");
    if (node.isArray()) {
        ArrayNode jVerbMoods = (ArrayNode) node;
        for (int i = 0; i < jVerbMoods.size(); i++) {
            JsonNode member = jVerbMoods.get(i);
            if (member.isObject()) {
                ObjectNode jVerbMood = (ObjectNode) member;
                JsonNode tag = jVerbMood.path("tag");
                if (tag.isTextual()) {
                    EnumSet<VerbMood> type = JsonUtils.parseEnum(jVerbMood, "type", VerbMood.class);
                    if (type.isEmpty()) {
                        morpho.addVerbForm(new VerbMoodTag(tag.getTextValue()));
                    } else {
                        morpho.addVerbForm(new VerbMoodTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse VerbMoodTag becuase 'tag' value is " + "missing or is not a String (json: " + jVerbMood.toString() + ")");
                }
            } else {
                log.warn("Unable to parse VerbMoodTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse VerbMoodTag (Json Array expected as value for field 'case' but was " + node);
    }
    return morpho;
}
Also used : Tense(org.apache.stanbol.enhancer.nlp.morpho.Tense) CaseTag(org.apache.stanbol.enhancer.nlp.morpho.CaseTag) ObjectNode(org.codehaus.jackson.node.ObjectNode) Definitness(org.apache.stanbol.enhancer.nlp.morpho.Definitness) JsonNode(org.codehaus.jackson.JsonNode) VerbMoodTag(org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag) Gender(org.apache.stanbol.enhancer.nlp.morpho.Gender) Case(org.apache.stanbol.enhancer.nlp.morpho.Case) NumberFeature(org.apache.stanbol.enhancer.nlp.morpho.NumberFeature) NumberTag(org.apache.stanbol.enhancer.nlp.morpho.NumberTag) VerbMood(org.apache.stanbol.enhancer.nlp.morpho.VerbMood) ArrayNode(org.codehaus.jackson.node.ArrayNode) TenseTag(org.apache.stanbol.enhancer.nlp.morpho.TenseTag) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) Person(org.apache.stanbol.enhancer.nlp.morpho.Person) GenderTag(org.apache.stanbol.enhancer.nlp.morpho.GenderTag)

Aggregations

Gender (org.apache.stanbol.enhancer.nlp.morpho.Gender)2 NumberFeature (org.apache.stanbol.enhancer.nlp.morpho.NumberFeature)2 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Literal (org.apache.clerezza.commons.rdf.Literal)1 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 Case (org.apache.stanbol.enhancer.nlp.morpho.Case)1 CaseTag (org.apache.stanbol.enhancer.nlp.morpho.CaseTag)1 Definitness (org.apache.stanbol.enhancer.nlp.morpho.Definitness)1 GenderTag (org.apache.stanbol.enhancer.nlp.morpho.GenderTag)1 MorphoFeatures (org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures)1 NumberTag (org.apache.stanbol.enhancer.nlp.morpho.NumberTag)1 Person (org.apache.stanbol.enhancer.nlp.morpho.Person)1 Tense (org.apache.stanbol.enhancer.nlp.morpho.Tense)1 TenseTag (org.apache.stanbol.enhancer.nlp.morpho.TenseTag)1 VerbMood (org.apache.stanbol.enhancer.nlp.morpho.VerbMood)1 VerbMoodTag (org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag)1 LexicalCategory (org.apache.stanbol.enhancer.nlp.pos.LexicalCategory)1 JsonNode (org.codehaus.jackson.JsonNode)1