Search in sources :

Example 1 with SentimentClassifier

use of org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier in project stanbol by apache.

the class SentimentEngine method computeEnhancements.

/**
 * Compute enhancements for supplied ContentItem. The results of the process
 * are expected to be stored in the metadata of the content item.
 * <p/>
 * The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
 * persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
 *
 * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
 *          if the underlying process failed to work as
 *          expected
 */
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText analysedText = getAnalysedText(this, ci, true);
    String language = getLanguage(this, ci, true);
    SentimentClassifier classifier = classifiers.get(language);
    if (classifier == null) {
        throw new IllegalStateException("Sentiment Classifier for language '" + language + "' not available. As this is also checked in " + " canEnhance this may indicate an Bug in the used " + "EnhancementJobManager!");
    }
    // TODO: locking for AnalysedText not yet defined
    // ci.getLock().writeLock().lock();
    // try {
    Iterator<Token> tokens = analysedText.getTokens();
    while (tokens.hasNext()) {
        Token token = tokens.next();
        Set<LexicalCategory> cats = null;
        boolean process = false;
        if (!adjectivesOnly) {
            process = true;
            Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
            if (posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY || posTag.probability() >= (minPOSConfidence / 2.0)) {
                cats = classifier.getCategories(posTag.value());
            } else {
                // no POS tags or probability to low
                cats = Collections.emptySet();
            }
        } else {
            // check PosTags if we need to lookup this word
            Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
            boolean ignore = false;
            while (!ignore && !process && posTags.hasNext()) {
                Value<PosTag> value = posTags.next();
                PosTag tag = value.value();
                cats = classifier.getCategories(tag);
                boolean state = cats.contains(LexicalCategory.Adjective) || cats.contains(LexicalCategory.Noun);
                ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= minPOSConfidence);
                process = state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= (minPOSConfidence / 2.0));
            }
        }
        // else process all tokens ... no POS tag checking needed
        if (process) {
            String word = token.getSpan();
            double sentiment = 0.0;
            if (cats.isEmpty()) {
                sentiment = classifier.classifyWord(null, word);
            } else {
                // in case of multiple Lexical Cats
                // we build the average over NOT NULL sentiments for the word
                int catSentNum = 0;
                for (LexicalCategory cat : cats) {
                    double catSent = classifier.classifyWord(cat, word);
                    if (catSent != 0.0) {
                        catSentNum++;
                        sentiment = sentiment + catSent;
                    }
                }
                if (catSentNum > 0) {
                    sentiment = sentiment / (double) catSentNum;
                }
            }
            if (sentiment != 0.0) {
                token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));
            }
        // else do not set sentiments with 0.0
        }
    // else do not process
    }
// } finally {
// ci.getLock().writeLock().unlock();
// }
}
Also used : Token(org.apache.stanbol.enhancer.nlp.model.Token) LexicalCategory(org.apache.stanbol.enhancer.nlp.pos.LexicalCategory) NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) SentimentClassifier(org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value)

Aggregations

SentimentClassifier (org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier)1 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)1 Token (org.apache.stanbol.enhancer.nlp.model.Token)1 Value (org.apache.stanbol.enhancer.nlp.model.annotation.Value)1 LexicalCategory (org.apache.stanbol.enhancer.nlp.pos.LexicalCategory)1 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)1 NlpEngineHelper.getAnalysedText (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText)1