Search in sources :

Example 16 with Token

use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.

the class DependencyRelationSupportTest method initDepTreeAnnotations.

private static void initDepTreeAnnotations() {
    Sentence sentence = at.addSentence(0, text.indexOf(".") + 1);
    Token obama = sentence.addToken(0, "Obama".length());
    int visitedStartIdx = sentence.getSpan().indexOf("visited");
    Token visited = sentence.addToken(visitedStartIdx, visitedStartIdx + "visited".length());
    int chinaStartIdx = sentence.getSpan().indexOf("China");
    Token china = sentence.addToken(chinaStartIdx, chinaStartIdx + "China".length());
    GrammaticalRelationTag nSubjGrammRelTag = new GrammaticalRelationTag("nsubj", GrammaticalRelation.NominalSubject);
    obama.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(nSubjGrammRelTag, true, visited)));
    GrammaticalRelationTag rootGrammRelTag = new GrammaticalRelationTag("root", GrammaticalRelation.Root);
    GrammaticalRelationTag dobjGrammRelTag = new GrammaticalRelationTag("dobj", GrammaticalRelation.DirectObject);
    visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(rootGrammRelTag, true, null)));
    visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(nSubjGrammRelTag, false, obama)));
    visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(dobjGrammRelTag, false, china)));
    china.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(dobjGrammRelTag, true, visited)));
}
Also used : Token(org.apache.stanbol.enhancer.nlp.model.Token) GrammaticalRelationTag(org.apache.stanbol.enhancer.nlp.dependency.GrammaticalRelationTag) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence) DependencyRelation(org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation)

Example 17 with Token

use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.

the class CeliAnalyzedTextSentimentAnalysisEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    isLangaugeConfigured(this, languageConfig, language, true);
    List<SentimentExpression> seList;
    try {
        seList = this.client.extractSentimentExpressions(at.getSpan(), language);
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
    }
    for (SentimentExpression se : seList) {
        //Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
        //Method returns the existing instance
        Token token = at.addToken(se.getStartSnippet(), se.getEndSnippet());
        token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()));
    }
}
Also used : NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) SOAPException(javax.xml.soap.SOAPException) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Token(org.apache.stanbol.enhancer.nlp.model.Token) IOException(java.io.IOException)

Example 18 with Token

use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.

the class SentimentSummarizationEngine method getSentimentContext.

private Integer[] getSentimentContext(Integer index, Sentiment sentiment, NavigableMap<Integer, Token> verbs, NavigableMap<Integer, Token> conjunctions, NavigableMap<Integer, Token> nouns, Integer[] sectionSpan) {
    Integer[] context;
    PosTag pos = sentiment.getPosTag();
    boolean isPredicative;
    if (pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)) {
        isPredicative = true;
    } else if (pos != null && pos.hasCategory(LexicalCategory.Adjective) && //Adjective that are not directly in front of a Noun
    nouns.get(Integer.valueOf(index + 1)) == null) {
        isPredicative = true;
    } else {
        isPredicative = false;
    }
    if (isPredicative) {
        //            Integer floorConjunction = conjunctions.floorKey(index);
        //            if(floorConjunction != null && floorConjunction.compareTo(
        //                Integer.valueOf(Math.max(index-conjuctionContext,sectionSpan[0]))) >= 0){
        //                lowIndex = Integer.valueOf(floorConjunction-1);
        //            }
        //            Integer ceilingConjunction = conjunctions.ceilingKey(index);
        //            if(ceilingConjunction != null && ceilingConjunction.compareTo(
        //                Integer.valueOf(Math.min(index+conjuctionContext,sectionSpan[1]))) <= 0){
        //                highIndex = Integer.valueOf(ceilingConjunction+1);
        //            }
        //use the verb as context
        Integer floorNoun = nouns.floorKey(index);
        Entry<Integer, Token> floorVerb = verbs.floorEntry(index);
        Integer ceilingNoun = nouns.ceilingKey(index);
        Entry<Integer, Token> ceilingVerb = verbs.ceilingEntry(index);
        floorVerb = floorVerb == null || floorVerb.getKey().compareTo(sectionSpan[0]) < 0 || //do not use verbs with an noun in-between
        (floorNoun != null && floorVerb.getKey().compareTo(floorNoun) < 0) ? null : floorVerb;
        ceilingVerb = ceilingVerb == null || ceilingVerb.getKey().compareTo(sectionSpan[1]) > 0 || //do not use verbs with an noun in-between
        (ceilingNoun != null && ceilingVerb.getKey().compareTo(ceilingNoun) > 0) ? null : ceilingVerb;
        Entry<Integer, Token> verb;
        if (ceilingVerb != null && floorVerb != null) {
            verb = (index - floorVerb.getKey()) < (ceilingVerb.getKey() - index) ? floorVerb : ceilingVerb;
        } else if (ceilingVerb != null) {
            verb = ceilingVerb;
        } else if (floorVerb != null) {
            verb = floorVerb;
        } else {
            //no verb that can be used as context ... return an area around the current pos.
            verb = null;
        }
        if (verb != null) {
            if (verb.getKey().compareTo(index) < 0) {
                Integer floorConjunction = conjunctions.floorKey(verb.getKey());
                if (floorConjunction != null && floorConjunction.compareTo(Integer.valueOf(Math.max(verb.getKey() - conjuctionContext, sectionSpan[0]))) >= 0) {
                    //search an other verb in the same direction
                    floorVerb = verbs.floorEntry(floorConjunction);
                    if (floorVerb != null && floorVerb.getKey().compareTo(sectionSpan[0]) >= 0 && //do not step over an noun
                    (floorNoun == null || floorVerb.getKey().compareTo(floorNoun) >= 0)) {
                        verb = floorVerb;
                    }
                }
            } else if (verb.getKey().compareTo(index) > 0) {
                Integer ceilingConjunction = conjunctions.ceilingKey(verb.getKey());
                if (ceilingConjunction != null && ceilingConjunction.compareTo(Integer.valueOf(Math.min(verb.getKey() + conjuctionContext, sectionSpan[1]))) >= 0) {
                    //search an other verb in the same direction
                    ceilingVerb = verbs.floorEntry(ceilingConjunction);
                    if (ceilingVerb != null && ceilingVerb.getKey().compareTo(sectionSpan[1]) <= 0 && //do not step over an noun
                    (ceilingNoun == null || ceilingVerb.getKey().compareTo(ceilingNoun) <= 0)) {
                        verb = ceilingVerb;
                    }
                }
            }
            context = new Integer[] { Integer.valueOf(verb.getKey() - nounContext), Integer.valueOf(verb.getKey() + nounContext) };
            sentiment.setVerb(verb.getValue());
        } else {
            context = new Integer[] { Integer.valueOf(index - nounContext), Integer.valueOf(index + nounContext) };
        }
    } else if (pos != null && pos.hasCategory(LexicalCategory.Adjective)) {
        //for all other adjective the affected noun is expected directly
        //after the noun
        context = new Integer[] { index, Integer.valueOf(index + 1) };
    } else if (pos != null && pos.hasCategory(LexicalCategory.Noun)) {
        //a noun with an sentiment
        context = new Integer[] { index, index };
    } else {
        //else (includes pos == null) return default
        context = new Integer[] { Integer.valueOf(index - nounContext), Integer.valueOf(index + nounContext) };
    }
    //ensure the returned context does not exceed the parsed sectionSpan 
    if (context[0].compareTo(sectionSpan[0]) < 0) {
        context[0] = sectionSpan[0];
    }
    if (context[1].compareTo(sectionSpan[1]) > 0) {
        context[1] = sectionSpan[1];
    }
    return context;
}
Also used : PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Token(org.apache.stanbol.enhancer.nlp.model.Token)

Example 19 with Token

use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.

the class PosChunkerEngine method computeEnhancements.

/**
     * Compute enhancements for supplied ContentItem. The results of the process
     * are expected to be stored in the metadata of the content item.
     * <p/>
     * The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
     * persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
     *
     * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
     *          if the underlying process failed to work as
     *          expected
     */
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    isLangaugeConfigured(this, languageConfiguration, language, true);
    //init the PhraseBuilder
    ChunkFactory chunkFactory = new ChunkFactoryImpl(at, ci.getLock());
    List<PhraseBuilder> phraseBuilders = new ArrayList<PhraseBuilder>(phraseTypeDefinitions.size());
    for (PhraseTypeDefinition ptd : phraseTypeDefinitions) {
        phraseBuilders.add(new PhraseBuilder(ptd, chunkFactory, minPosScore));
    }
    Iterator<? extends Section> sentences = at.getSentences();
    if (!sentences.hasNext()) {
        //no sentences ... iterate over the whole text
        sentences = Collections.singleton(at).iterator();
    }
    while (sentences.hasNext()) {
        // (1) get Tokens and POS information for the sentence
        Section sentence = sentences.next();
        for (PhraseBuilder pb : phraseBuilders) {
            pb.nextSection(sentence);
        }
        Iterator<Token> tokens = sentence.getTokens();
        while (tokens.hasNext()) {
            Token token = tokens.next();
            for (PhraseBuilder pb : phraseBuilders) {
                pb.nextToken(token);
            }
        }
    }
    //signal the end of the document
    for (PhraseBuilder pb : phraseBuilders) {
        pb.nextSection(null);
    }
//        if(log.isTraceEnabled()){
//            logChunks(at);
//        }
}
Also used : NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) ArrayList(java.util.ArrayList) Token(org.apache.stanbol.enhancer.nlp.model.Token) PhraseBuilder(org.apache.stanbol.enhancer.engines.poschunker.PhraseBuilder) ChunkFactory(org.apache.stanbol.enhancer.engines.poschunker.PhraseBuilder.ChunkFactory) Section(org.apache.stanbol.enhancer.nlp.model.Section) PhraseTypeDefinition(org.apache.stanbol.enhancer.engines.poschunker.PhraseTypeDefinition)

Example 20 with Token

use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.

the class SentimentSummarizationEngine method extractSentiments.

/**
     * Extracts {@link Sentiment}s for words with a {@link NlpAnnotations#SENTIMENT_ANNOTATION}.
     * The {@link NlpAnnotations#POS_ANNOTATION}s are used to link those words with
     * {@link LexicalCategory#Noun}s.
     * @param at the AnalyzedText to process
     * @return the {@link Sentiment} instances organised along {@link Sentence}s. If
     * no {@link Sentence}s are present on the parsed {@link AnalysedText}, than all
     * {@link Sentiment}s are added to the {@link AnalysedText}. Otherwise only 
     * {@link Sentiment}s not contained within a {@link Sentence} are added to the
     * {@link AnalysedText} key.
     */
private List<SentimentPhrase> extractSentiments(AnalysedText at, String language) {
    //we do use Sentences (optional) and Tokens (required)
    Iterator<Span> tokenIt = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Token));
    List<Sentiment> sentimentTokens = new ArrayList<Sentiment>(32);
    NavigableMap<Integer, Token> negations = new TreeMap<Integer, Token>();
    NavigableMap<Integer, Token> nounsAndPronouns = new TreeMap<Integer, Token>();
    NavigableMap<Integer, Token> verbs = new TreeMap<Integer, Token>();
    NavigableMap<Integer, Token> conjuctions = new TreeMap<Integer, Token>();
    NavigableMap<Integer, Token> sectionBorders = new TreeMap<Integer, Token>();
    boolean firstTokenInSentence = true;
    Sentence sentence = null;
    final List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>();
    while (tokenIt.hasNext()) {
        Span span = tokenIt.next();
        switch(span.getType()) {
            case Token:
                Token word = (Token) span;
                Integer wordIndex = sentimentTokens.size();
                Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
                boolean addToList = false;
                Sentiment sentiment = null;
                if (sentimentAnnotation != null && sentimentAnnotation.value() != null && !sentimentAnnotation.value().equals(ZERO)) {
                    sentiment = new Sentiment(word, sentimentAnnotation.value(), sentence == null || word.getEnd() > sentence.getEnd() ? null : sentence);
                    addToList = true;
                }
                if (isNegation((Token) span, language)) {
                    addToList = true;
                    negations.put(wordIndex, word);
                } else if (isNoun(word, firstTokenInSentence, language) || isPronoun(word, language)) {
                    addToList = true;
                    nounsAndPronouns.put(wordIndex, word);
                } else if (isSectionBorder(word, language)) {
                    addToList = true;
                    sectionBorders.put(wordIndex, word);
                } else if (isVerb(word, language)) {
                    addToList = true;
                    verbs.put(wordIndex, word);
                } else if (isCoordinatingConjuction(word, language)) {
                    addToList = true;
                    conjuctions.put(wordIndex, word);
                } else if (isCountable(word, language)) {
                    addToList = true;
                }
                if (log.isDebugEnabled()) {
                    Value<PosTag> pos = word.getAnnotation(NlpAnnotations.POS_ANNOTATION);
                    log.debug(" [{}] '{}' pos: {}, sentiment {}", new Object[] { addToList ? sentimentTokens.size() : "-", word.getSpan(), pos.value().getCategories(), sentiment == null ? "none" : sentiment.getValue() });
                }
                if (addToList) {
                    //add the token
                    sentimentTokens.add(sentiment);
                }
                firstTokenInSentence = false;
                break;
            case Sentence:
                //cleanup the previous sentence
                sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders));
                negations.clear();
                nounsAndPronouns.clear();
                sentimentTokens.clear();
                verbs.clear();
                sectionBorders.clear();
                firstTokenInSentence = true;
                sentence = (Sentence) span;
                break;
            case TextSection:
                break;
            default:
                break;
        }
    }
    sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders));
    return sentimentPhrases;
}
Also used : ArrayList(java.util.ArrayList) Token(org.apache.stanbol.enhancer.nlp.model.Token) TreeMap(java.util.TreeMap) Span(org.apache.stanbol.enhancer.nlp.model.Span) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence)

Aggregations

Token (org.apache.stanbol.enhancer.nlp.model.Token)23 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)13 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)12 Sentence (org.apache.stanbol.enhancer.nlp.model.Sentence)9 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)8 ArrayList (java.util.ArrayList)7 Chunk (org.apache.stanbol.enhancer.nlp.model.Chunk)7 Section (org.apache.stanbol.enhancer.nlp.model.Section)5 NerTag (org.apache.stanbol.enhancer.nlp.ner.NerTag)5 NlpEngineHelper.getAnalysedText (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText)5 IOException (java.io.IOException)4 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)4 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)4 NlpEngineHelper.initAnalysedText (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.initAnalysedText)4 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)4 IRI (org.apache.clerezza.commons.rdf.IRI)3 Value (org.apache.stanbol.enhancer.nlp.model.annotation.Value)3 MorphoFeatures (org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures)3 PhraseTag (org.apache.stanbol.enhancer.nlp.phrase.PhraseTag)3 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)3