Search in sources :

Example 1 with NumberTag

use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.

the class AnalyzedTextSerializerAndParserTest method setup.

@BeforeClass
public static final void setup() throws IOException {
    ci = ciFactory.createContentItem(new StringSource(text));
    textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    analysedTextWithData = createAnalysedText();
    int sentence = text.indexOf('.') + 1;
    Sentence sent1 = analysedTextWithData.addSentence(0, sentence);
    expectedSentences.put(sent1, "The Stanbol enhancer can detect famous " + "cities such as Paris and people such as Bob Marley.");
    Token the = sent1.addToken(0, 3);
    expectedTokens.put(the, "The");
    the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PREP", Pos.Preposition), 0.85));
    Token stanbol = sent1.addToken(4, 11);
    expectedTokens.put(stanbol, "Stanbol");
    stanbol.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PN", Pos.ProperNoun), 0.95));
    stanbol.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, Value.value(0.5));
    //use index to create Tokens
    int enhancerStart = sent1.getSpan().indexOf("enhancer");
    Token enhancer = sent1.addToken(enhancerStart, enhancerStart + "enhancer".length());
    expectedTokens.put(enhancer, "enhancer");
    enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("PN", Pos.ProperNoun), 0.95));
    enhancer.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("N", LexicalCategory.Noun), 0.87));
    MorphoFeatures morpho = new MorphoFeatures("enhance");
    morpho.addCase(new CaseTag("test-case-1", Case.Comitative));
    morpho.addCase(new CaseTag("test-case-2", Case.Abessive));
    morpho.addDefinitness(Definitness.Definite);
    morpho.addPerson(Person.First);
    morpho.addPos(new PosTag("PN", Pos.ProperNoun));
    morpho.addGender(new GenderTag("test-gender", Gender.Masculine));
    morpho.addNumber(new NumberTag("test-number", NumberFeature.Plural));
    morpho.addTense(new TenseTag("test-tense", Tense.Present));
    morpho.addVerbForm(new VerbMoodTag("test-verb-mood", VerbMood.ConditionalVerb));
    enhancer.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, Value.value(morpho));
    //create a chunk
    Chunk stanbolEnhancer = analysedTextWithData.addChunk(stanbol.getStart(), enhancer.getEnd());
    expectedChunks.put(stanbolEnhancer, "Stanbol enhancer");
    stanbolEnhancer.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(new NerTag("organization", DBPEDIA_ORGANISATION)));
    stanbolEnhancer.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(new PhraseTag("NP", LexicalCategory.Noun), 0.98));
}
Also used : CaseTag(org.apache.stanbol.enhancer.nlp.morpho.CaseTag) NerTag(org.apache.stanbol.enhancer.nlp.ner.NerTag) Token(org.apache.stanbol.enhancer.nlp.model.Token) VerbMoodTag(org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag) Chunk(org.apache.stanbol.enhancer.nlp.model.Chunk) PhraseTag(org.apache.stanbol.enhancer.nlp.phrase.PhraseTag) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) NumberTag(org.apache.stanbol.enhancer.nlp.morpho.NumberTag) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TenseTag(org.apache.stanbol.enhancer.nlp.morpho.TenseTag) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence) GenderTag(org.apache.stanbol.enhancer.nlp.morpho.GenderTag) BeforeClass(org.junit.BeforeClass)

Example 2 with NumberTag

use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.

the class MorphoFeaturesSupport method serialize.

@Override
public ObjectNode serialize(ObjectMapper mapper, MorphoFeatures morpho) {
    ObjectNode jMorpho = mapper.createObjectNode();
    jMorpho.put("lemma", morpho.getLemma());
    List<CaseTag> caseList = morpho.getCaseList();
    if (!caseList.isEmpty()) {
        ArrayNode jCases = mapper.createArrayNode();
        for (CaseTag caseTag : caseList) {
            ObjectNode jCase = mapper.createObjectNode();
            jCase.put("tag", caseTag.getTag());
            if (caseTag.getCase() != null) {
                jCase.put("type", caseTag.getCase().name());
            }
            jCases.add(jCase);
        }
        jMorpho.put("case", jCases);
    }
    List<Definitness> definitnesses = morpho.getDefinitnessList();
    if (!definitnesses.isEmpty()) {
        if (definitnesses.size() == 1) {
            jMorpho.put("definitness", definitnesses.get(0).name());
        } else {
            ArrayNode jDefinitnesses = mapper.createArrayNode();
            for (Definitness d : definitnesses) {
                jDefinitnesses.add(d.name());
            }
            jMorpho.put("definitness", jDefinitnesses);
        }
    }
    List<GenderTag> genderList = morpho.getGenderList();
    if (!genderList.isEmpty()) {
        ArrayNode jGenders = mapper.createArrayNode();
        for (GenderTag genderTag : genderList) {
            ObjectNode jGender = mapper.createObjectNode();
            jGender.put("tag", genderTag.getTag());
            if (genderTag.getGender() != null) {
                jGender.put("type", genderTag.getGender().name());
            }
            jGenders.add(jGender);
        }
        jMorpho.put("gender", jGenders);
    }
    List<NumberTag> numberList = morpho.getNumberList();
    if (!numberList.isEmpty()) {
        ArrayNode jNumbers = mapper.createArrayNode();
        for (NumberTag numberTag : numberList) {
            ObjectNode jNumber = mapper.createObjectNode();
            jNumber.put("tag", numberTag.getTag());
            if (numberTag.getNumber() != null) {
                jNumber.put("type", numberTag.getNumber().name());
            }
            jNumbers.add(jNumber);
        }
        jMorpho.put("number", jNumbers);
    }
    List<Person> persons = morpho.getPersonList();
    if (!persons.isEmpty()) {
        if (persons.size() == 1) {
            jMorpho.put("person", persons.get(0).name());
        } else {
            ArrayNode jPersons = mapper.createArrayNode();
            for (Person d : persons) {
                jPersons.add(d.name());
            }
            jMorpho.put("person", jPersons);
        }
    }
    List<PosTag> posList = morpho.getPosList();
    if (!posList.isEmpty()) {
        ArrayNode jPosTags = mapper.createArrayNode();
        for (PosTag posTag : posList) {
            jPosTags.add(getPosTagSerializer().serialize(mapper, posTag));
        }
        jMorpho.put("pos", jPosTags);
    }
    List<TenseTag> tenseList = morpho.getTenseList();
    if (!tenseList.isEmpty()) {
        ArrayNode jTenses = mapper.createArrayNode();
        for (TenseTag tenseTag : tenseList) {
            ObjectNode jTense = mapper.createObjectNode();
            jTense.put("tag", tenseTag.getTag());
            if (tenseTag.getTense() != null) {
                jTense.put("type", tenseTag.getTense().name());
            }
            jTenses.add(jTense);
        }
        jMorpho.put("tense", jTenses);
    }
    List<VerbMoodTag> verbMoodList = morpho.getVerbMoodList();
    if (!verbMoodList.isEmpty()) {
        ArrayNode jMoods = mapper.createArrayNode();
        for (VerbMoodTag verbMoodTag : verbMoodList) {
            ObjectNode jMood = mapper.createObjectNode();
            jMood.put("tag", verbMoodTag.getTag());
            if (verbMoodTag.getVerbForm() != null) {
                jMood.put("type", verbMoodTag.getVerbForm().name());
            }
            jMoods.add(jMood);
        }
        jMorpho.put("verb-mood", jMoods);
    }
    return jMorpho;
}
Also used : CaseTag(org.apache.stanbol.enhancer.nlp.morpho.CaseTag) ObjectNode(org.codehaus.jackson.node.ObjectNode) Definitness(org.apache.stanbol.enhancer.nlp.morpho.Definitness) VerbMoodTag(org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) NumberTag(org.apache.stanbol.enhancer.nlp.morpho.NumberTag) ArrayNode(org.codehaus.jackson.node.ArrayNode) TenseTag(org.apache.stanbol.enhancer.nlp.morpho.TenseTag) Person(org.apache.stanbol.enhancer.nlp.morpho.Person) GenderTag(org.apache.stanbol.enhancer.nlp.morpho.GenderTag)

Example 3 with NumberTag

use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.

the class MorphoFeaturesSupport method parse.

@Override
public MorphoFeatures parse(ObjectNode jMorpho, AnalysedText at) {
    JsonNode jLemma = jMorpho.path("lemma");
    if (!jLemma.isTextual()) {
        throw new IllegalStateException("Field 'lemma' MUST provide a String value (parsed JSON: " + jMorpho);
    }
    MorphoFeatures morpho = new MorphoFeatures(jLemma.asText());
    JsonNode node = jMorpho.path("case");
    if (node.isArray()) {
        ArrayNode jCases = (ArrayNode) node;
        for (int i = 0; i < jCases.size(); i++) {
            JsonNode member = jCases.get(i);
            if (member.isObject()) {
                ObjectNode jCase = (ObjectNode) member;
                JsonNode tag = jCase.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Case> type = JsonUtils.parseEnum(jCase, "type", Case.class);
                    if (type.isEmpty()) {
                        morpho.addCase(new CaseTag(tag.getTextValue()));
                    } else {
                        morpho.addCase(new CaseTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse CaseTag becuase 'tag' value is " + "missing or is not a String (json: " + jCase.toString() + ")");
                }
            } else {
                log.warn("Unable to parse CaseTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse CaseTags (Json Array expected as value for field 'case' but was " + node);
    }
    if (jMorpho.has("definitness")) {
        for (Definitness d : JsonUtils.parseEnum(jMorpho, "definitness", Definitness.class)) {
            morpho.addDefinitness(d);
        }
    }
    node = jMorpho.path("gender");
    if (node.isArray()) {
        ArrayNode jGenders = (ArrayNode) node;
        for (int i = 0; i < jGenders.size(); i++) {
            JsonNode member = jGenders.get(i);
            if (member.isObject()) {
                ObjectNode jGender = (ObjectNode) member;
                JsonNode tag = jGender.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Gender> type = JsonUtils.parseEnum(jGender, "type", Gender.class);
                    if (type.isEmpty()) {
                        morpho.addGender(new GenderTag(tag.getTextValue()));
                    } else {
                        morpho.addGender(new GenderTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse GenderTag becuase 'tag' value is " + "missing or is not a String (json: " + jGender.toString() + ")");
                }
            } else {
                log.warn("Unable to parse GenderTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse GenderTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("number");
    if (node.isArray()) {
        ArrayNode jNumbers = (ArrayNode) node;
        for (int i = 0; i < jNumbers.size(); i++) {
            JsonNode member = jNumbers.get(i);
            if (member.isObject()) {
                ObjectNode jNumber = (ObjectNode) member;
                JsonNode tag = jNumber.path("tag");
                if (tag.isTextual()) {
                    EnumSet<NumberFeature> type = JsonUtils.parseEnum(jNumber, "type", NumberFeature.class);
                    if (type.isEmpty()) {
                        morpho.addNumber(new NumberTag(tag.getTextValue()));
                    } else {
                        morpho.addNumber(new NumberTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse NumberTag becuase 'tag' value is " + "missing or is not a String (json: " + jNumber.toString() + ")");
                }
            } else {
                log.warn("Unable to parse NumberTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse NumberTag (Json Array expected as value for field 'case' but was " + node);
    }
    if (jMorpho.has("person")) {
        for (Person p : JsonUtils.parseEnum(jMorpho, "person", Person.class)) {
            morpho.addPerson(p);
        }
    }
    node = jMorpho.path("pos");
    if (node.isArray()) {
        ArrayNode jPosTags = (ArrayNode) node;
        for (int i = 0; i < jPosTags.size(); i++) {
            JsonNode member = jPosTags.get(i);
            if (member.isObject()) {
                ObjectNode jPosTag = (ObjectNode) member;
                morpho.addPos(getPosTagParser().parse(jPosTag, at));
            } else {
                log.warn("Unable to parse PosTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse PosTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("tense");
    if (node.isArray()) {
        ArrayNode jTenses = (ArrayNode) node;
        for (int i = 0; i < jTenses.size(); i++) {
            JsonNode member = jTenses.get(i);
            if (member.isObject()) {
                ObjectNode jTense = (ObjectNode) member;
                JsonNode tag = jTense.path("tag");
                if (tag.isTextual()) {
                    EnumSet<Tense> type = JsonUtils.parseEnum(jTense, "type", Tense.class);
                    if (type.isEmpty()) {
                        morpho.addTense(new TenseTag(tag.getTextValue()));
                    } else {
                        morpho.addTense(new TenseTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse TenseTag becuase 'tag' value is " + "missing or is not a String (json: " + jTense.toString() + ")");
                }
            } else {
                log.warn("Unable to parse TenseTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse TenseTag (Json Array expected as value for field 'case' but was " + node);
    }
    node = jMorpho.path("verb-mood");
    if (node.isArray()) {
        ArrayNode jVerbMoods = (ArrayNode) node;
        for (int i = 0; i < jVerbMoods.size(); i++) {
            JsonNode member = jVerbMoods.get(i);
            if (member.isObject()) {
                ObjectNode jVerbMood = (ObjectNode) member;
                JsonNode tag = jVerbMood.path("tag");
                if (tag.isTextual()) {
                    EnumSet<VerbMood> type = JsonUtils.parseEnum(jVerbMood, "type", VerbMood.class);
                    if (type.isEmpty()) {
                        morpho.addVerbForm(new VerbMoodTag(tag.getTextValue()));
                    } else {
                        morpho.addVerbForm(new VerbMoodTag(tag.getTextValue(), type.iterator().next()));
                    }
                } else {
                    log.warn("Unable to parse VerbMoodTag becuase 'tag' value is " + "missing or is not a String (json: " + jVerbMood.toString() + ")");
                }
            } else {
                log.warn("Unable to parse VerbMoodTag from " + member.toString());
            }
        }
    } else if (!node.isMissingNode()) {
        log.warn("Unable to parse VerbMoodTag (Json Array expected as value for field 'case' but was " + node);
    }
    return morpho;
}
Also used : Tense(org.apache.stanbol.enhancer.nlp.morpho.Tense) CaseTag(org.apache.stanbol.enhancer.nlp.morpho.CaseTag) ObjectNode(org.codehaus.jackson.node.ObjectNode) Definitness(org.apache.stanbol.enhancer.nlp.morpho.Definitness) JsonNode(org.codehaus.jackson.JsonNode) VerbMoodTag(org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag) Gender(org.apache.stanbol.enhancer.nlp.morpho.Gender) Case(org.apache.stanbol.enhancer.nlp.morpho.Case) NumberFeature(org.apache.stanbol.enhancer.nlp.morpho.NumberFeature) NumberTag(org.apache.stanbol.enhancer.nlp.morpho.NumberTag) VerbMood(org.apache.stanbol.enhancer.nlp.morpho.VerbMood) ArrayNode(org.codehaus.jackson.node.ArrayNode) TenseTag(org.apache.stanbol.enhancer.nlp.morpho.TenseTag) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) Person(org.apache.stanbol.enhancer.nlp.morpho.Person) GenderTag(org.apache.stanbol.enhancer.nlp.morpho.GenderTag)

Example 4 with NumberTag

use of org.apache.stanbol.enhancer.nlp.morpho.NumberTag in project stanbol by apache.

the class CeliMorphoFeatures method featuresAsTriples.

public Collection<? extends Triple> featuresAsTriples(IRI textAnnotation, Language lang) {
    Collection<TripleImpl> result = new Vector<TripleImpl>();
    result.add(new TripleImpl(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(getLemma(), lang)));
    for (PosTag pos : getPosList()) {
        if (pos.isMapped()) {
            for (LexicalCategory cat : pos.getCategories()) {
                result.add(new TripleImpl(textAnnotation, RDF_TYPE, cat.getUri()));
            }
        }
    }
    for (NumberTag num : getNumberList()) {
        if (num.getNumber() != null) {
            result.add(new TripleImpl(textAnnotation, HAS_NUMBER, num.getNumber().getUri()));
        }
    }
    for (Person pers : getPersonList()) {
        result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getUri()));
    }
    for (GenderTag gender : getGenderList()) {
        if (gender.getGender() != null) {
            result.add(new TripleImpl(textAnnotation, HAS_GENDER, gender.getGender().getUri()));
        }
    }
    for (Definitness def : getDefinitnessList()) {
        result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getUri()));
    }
    for (CaseTag caseFeat : getCaseList()) {
        if (caseFeat.getCase() != null) {
            result.add(new TripleImpl(textAnnotation, HAS_CASE, caseFeat.getCase().getUri()));
        }
    }
    for (VerbMoodTag vf : getVerbMoodList()) {
        if (vf.getVerbForm() != null) {
            result.add(new TripleImpl(textAnnotation, HAS_MOOD, vf.getVerbForm().getUri()));
        }
    }
    for (TenseTag tense : getTenseList()) {
        if (tense.getTense() != null) {
            result.add(new TripleImpl(textAnnotation, HAS_TENSE, tense.getTense().getUri()));
        }
    }
    return result;
}
Also used : CaseTag(org.apache.stanbol.enhancer.nlp.morpho.CaseTag) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Definitness(org.apache.stanbol.enhancer.nlp.morpho.Definitness) VerbMoodTag(org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag) LexicalCategory(org.apache.stanbol.enhancer.nlp.pos.LexicalCategory) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) NumberTag(org.apache.stanbol.enhancer.nlp.morpho.NumberTag) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) TenseTag(org.apache.stanbol.enhancer.nlp.morpho.TenseTag) Vector(java.util.Vector) Person(org.apache.stanbol.enhancer.nlp.morpho.Person) GenderTag(org.apache.stanbol.enhancer.nlp.morpho.GenderTag)

Aggregations

CaseTag (org.apache.stanbol.enhancer.nlp.morpho.CaseTag)4 GenderTag (org.apache.stanbol.enhancer.nlp.morpho.GenderTag)4 NumberTag (org.apache.stanbol.enhancer.nlp.morpho.NumberTag)4 TenseTag (org.apache.stanbol.enhancer.nlp.morpho.TenseTag)4 VerbMoodTag (org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag)4 Definitness (org.apache.stanbol.enhancer.nlp.morpho.Definitness)3 Person (org.apache.stanbol.enhancer.nlp.morpho.Person)3 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)3 MorphoFeatures (org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures)2 ArrayNode (org.codehaus.jackson.node.ArrayNode)2 ObjectNode (org.codehaus.jackson.node.ObjectNode)2 Vector (java.util.Vector)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 Chunk (org.apache.stanbol.enhancer.nlp.model.Chunk)1 Sentence (org.apache.stanbol.enhancer.nlp.model.Sentence)1 Token (org.apache.stanbol.enhancer.nlp.model.Token)1 Case (org.apache.stanbol.enhancer.nlp.morpho.Case)1 Gender (org.apache.stanbol.enhancer.nlp.morpho.Gender)1 NumberFeature (org.apache.stanbol.enhancer.nlp.morpho.NumberFeature)1