Search in sources :

Example 6 with PhraseTag

use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.

the class EntityCoReferenceEngine method extractNersAndNounPhrases.

/**
     * Extracts the NERs and the noun phrases from the given text and puts them in the given lists.
     * 
     * @param ci
     * @param ners
     * @param nounPhrases
     */
private void extractNersAndNounPhrases(ContentItem ci, Map<Integer, List<Span>> ners, List<NounPhrase> nounPhrases) {
    AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
    Iterator<? extends Section> sections = at.getSentences();
    if (!sections.hasNext()) {
        // process as single sentence
        sections = Collections.singleton(at).iterator();
    }
    int sentenceCnt = 0;
    while (sections.hasNext()) {
        sentenceCnt++;
        Section section = sections.next();
        List<NounPhrase> sectionNounPhrases = new ArrayList<NounPhrase>();
        List<Span> sectionNers = new ArrayList<Span>();
        Iterator<Span> chunks = section.getEnclosed(EnumSet.of(SpanTypeEnum.Chunk));
        while (chunks.hasNext()) {
            Span chunk = chunks.next();
            Value<NerTag> ner = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
            if (ner != null) {
                sectionNers.add(chunk);
            }
            Value<PhraseTag> phrase = chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
            if (phrase != null && phrase.value().getCategory() == LexicalCategory.Noun) {
                sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
            }
        }
        for (NounPhrase nounPhrase : sectionNounPhrases) {
            Iterator<Span> tokens = section.getEnclosed(EnumSet.of(SpanTypeEnum.Token));
            while (tokens.hasNext()) {
                Span token = tokens.next();
                if (nounPhrase.containsSpan(token)) {
                    nounPhrase.addToken(token);
                }
            }
            for (Span sectionNer : sectionNers) {
                if (nounPhrase.containsSpan(sectionNer)) {
                    nounPhrase.addNerChunk(sectionNer);
                }
            }
        }
        nounPhrases.addAll(sectionNounPhrases);
        if (!sectionNers.isEmpty()) {
            ners.put(sentenceCnt, sectionNers);
        }
    }
}
Also used : NerTag(org.apache.stanbol.enhancer.nlp.ner.NerTag) NounPhrase(org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase) ArrayList(java.util.ArrayList) PhraseTag(org.apache.stanbol.enhancer.nlp.phrase.PhraseTag) Section(org.apache.stanbol.enhancer.nlp.model.Section) Span(org.apache.stanbol.enhancer.nlp.model.Span) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText)

Example 7 with PhraseTag

use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.

the class OpenNlpChunkingEngine method getPhraseTag.

private PhraseTag getPhraseTag(TagSet<PhraseTag> model, Map<String, PhraseTag> adhocTags, String tag, String language) {
    PhraseTag phraseTag = model.getTag(tag);
    if (phraseTag != null) {
        return phraseTag;
    }
    phraseTag = adhocTags.get(tag);
    if (phraseTag != null) {
        return phraseTag;
    }
    phraseTag = new PhraseTag(tag);
    adhocTags.put(tag, phraseTag);
    log.info("Encountered unknown POS tag '{}' for langauge '{}'", tag, language);
    return phraseTag;
}
Also used : PhraseTag(org.apache.stanbol.enhancer.nlp.phrase.PhraseTag)

Example 8 with PhraseTag

use of org.apache.stanbol.enhancer.nlp.phrase.PhraseTag in project stanbol by apache.

the class Nif20Helper method writePhrase.

/**
     * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the
     * parsed RDF graph by using the segmentUri as subject
     * @param graph the graph
     * @param annotated the annotated element (e.g. a {@link Chunk})
     * @param segmentUri the URI of the resource representing the parsed 
     * annotated element in the graph
     */
public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) {
    Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
    if (phraseTag != null) {
        IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory());
        if (phraseTypeUri != null) {
            //add the oliaLink for the Phrase
            graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), phraseTypeUri));
            setOliaConf(graph, segmentUri, phraseTag);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) PhraseTag(org.apache.stanbol.enhancer.nlp.phrase.PhraseTag)

Aggregations

PhraseTag (org.apache.stanbol.enhancer.nlp.phrase.PhraseTag)8 IRI (org.apache.clerezza.commons.rdf.IRI)3 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)3 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)3 Chunk (org.apache.stanbol.enhancer.nlp.model.Chunk)3 Token (org.apache.stanbol.enhancer.nlp.model.Token)3 NerTag (org.apache.stanbol.enhancer.nlp.ner.NerTag)3 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)3 ArrayList (java.util.ArrayList)2 Section (org.apache.stanbol.enhancer.nlp.model.Section)2 Sentence (org.apache.stanbol.enhancer.nlp.model.Sentence)2 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)2 ChunkerME (opennlp.tools.chunker.ChunkerME)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 NounPhrase (org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase)1 CorefFeature (org.apache.stanbol.enhancer.nlp.coref.CorefFeature)1 Span (org.apache.stanbol.enhancer.nlp.model.Span)1 CaseTag (org.apache.stanbol.enhancer.nlp.morpho.CaseTag)1 GenderTag (org.apache.stanbol.enhancer.nlp.morpho.GenderTag)1