use of org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier in project stanbol by apache.
the class SentimentEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText analysedText = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
SentimentClassifier classifier = classifiers.get(language);
if (classifier == null) {
throw new IllegalStateException("Sentiment Classifier for language '" + language + "' not available. As this is also checked in " + " canEnhance this may indicate an Bug in the used " + "EnhancementJobManager!");
}
// TODO: locking for AnalysedText not yet defined
// ci.getLock().writeLock().lock();
// try {
Iterator<Token> tokens = analysedText.getTokens();
while (tokens.hasNext()) {
Token token = tokens.next();
Set<LexicalCategory> cats = null;
boolean process = false;
if (!adjectivesOnly) {
process = true;
Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
if (posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY || posTag.probability() >= (minPOSConfidence / 2.0)) {
cats = classifier.getCategories(posTag.value());
} else {
// no POS tags or probability to low
cats = Collections.emptySet();
}
} else {
// check PosTags if we need to lookup this word
Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
boolean ignore = false;
while (!ignore && !process && posTags.hasNext()) {
Value<PosTag> value = posTags.next();
PosTag tag = value.value();
cats = classifier.getCategories(tag);
boolean state = cats.contains(LexicalCategory.Adjective) || cats.contains(LexicalCategory.Noun);
ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= minPOSConfidence);
process = state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= (minPOSConfidence / 2.0));
}
}
// else process all tokens ... no POS tag checking needed
if (process) {
String word = token.getSpan();
double sentiment = 0.0;
if (cats.isEmpty()) {
sentiment = classifier.classifyWord(null, word);
} else {
// in case of multiple Lexical Cats
// we build the average over NOT NULL sentiments for the word
int catSentNum = 0;
for (LexicalCategory cat : cats) {
double catSent = classifier.classifyWord(cat, word);
if (catSent != 0.0) {
catSentNum++;
sentiment = sentiment + catSent;
}
}
if (catSentNum > 0) {
sentiment = sentiment / (double) catSentNum;
}
}
if (sentiment != 0.0) {
token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));
}
// else do not set sentiments with 0.0
}
// else do not process
}
// } finally {
// ci.getLock().writeLock().unlock();
// }
}
Aggregations