use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.
the class DependencyRelationSupportTest method initDepTreeAnnotations.
private static void initDepTreeAnnotations() {
Sentence sentence = at.addSentence(0, text.indexOf(".") + 1);
Token obama = sentence.addToken(0, "Obama".length());
int visitedStartIdx = sentence.getSpan().indexOf("visited");
Token visited = sentence.addToken(visitedStartIdx, visitedStartIdx + "visited".length());
int chinaStartIdx = sentence.getSpan().indexOf("China");
Token china = sentence.addToken(chinaStartIdx, chinaStartIdx + "China".length());
GrammaticalRelationTag nSubjGrammRelTag = new GrammaticalRelationTag("nsubj", GrammaticalRelation.NominalSubject);
obama.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(nSubjGrammRelTag, true, visited)));
GrammaticalRelationTag rootGrammRelTag = new GrammaticalRelationTag("root", GrammaticalRelation.Root);
GrammaticalRelationTag dobjGrammRelTag = new GrammaticalRelationTag("dobj", GrammaticalRelation.DirectObject);
visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(rootGrammRelTag, true, null)));
visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(nSubjGrammRelTag, false, obama)));
visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(dobjGrammRelTag, false, china)));
china.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION, Value.value(new DependencyRelation(dobjGrammRelTag, true, visited)));
}
use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.
the class CeliAnalyzedTextSentimentAnalysisEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
isLangaugeConfigured(this, languageConfig, language, true);
List<SentimentExpression> seList;
try {
seList = this.client.extractSentimentExpressions(at.getSpan(), language);
} catch (IOException e) {
throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
}
for (SentimentExpression se : seList) {
//Add the Sentiment Expression as Token to the Text. NOTE that if a Token with the same start/end positions already exist this
//Method returns the existing instance
Token token = at.addToken(se.getStartSnippet(), se.getEndSnippet());
token.addAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION, new Value<Double>(se.getSentimentPolarityAsDoubleValue()));
}
}
use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.
the class SentimentSummarizationEngine method getSentimentContext.
private Integer[] getSentimentContext(Integer index, Sentiment sentiment, NavigableMap<Integer, Token> verbs, NavigableMap<Integer, Token> conjunctions, NavigableMap<Integer, Token> nouns, Integer[] sectionSpan) {
Integer[] context;
PosTag pos = sentiment.getPosTag();
boolean isPredicative;
if (pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)) {
isPredicative = true;
} else if (pos != null && pos.hasCategory(LexicalCategory.Adjective) && //Adjective that are not directly in front of a Noun
nouns.get(Integer.valueOf(index + 1)) == null) {
isPredicative = true;
} else {
isPredicative = false;
}
if (isPredicative) {
// Integer floorConjunction = conjunctions.floorKey(index);
// if(floorConjunction != null && floorConjunction.compareTo(
// Integer.valueOf(Math.max(index-conjuctionContext,sectionSpan[0]))) >= 0){
// lowIndex = Integer.valueOf(floorConjunction-1);
// }
// Integer ceilingConjunction = conjunctions.ceilingKey(index);
// if(ceilingConjunction != null && ceilingConjunction.compareTo(
// Integer.valueOf(Math.min(index+conjuctionContext,sectionSpan[1]))) <= 0){
// highIndex = Integer.valueOf(ceilingConjunction+1);
// }
//use the verb as context
Integer floorNoun = nouns.floorKey(index);
Entry<Integer, Token> floorVerb = verbs.floorEntry(index);
Integer ceilingNoun = nouns.ceilingKey(index);
Entry<Integer, Token> ceilingVerb = verbs.ceilingEntry(index);
floorVerb = floorVerb == null || floorVerb.getKey().compareTo(sectionSpan[0]) < 0 || //do not use verbs with an noun in-between
(floorNoun != null && floorVerb.getKey().compareTo(floorNoun) < 0) ? null : floorVerb;
ceilingVerb = ceilingVerb == null || ceilingVerb.getKey().compareTo(sectionSpan[1]) > 0 || //do not use verbs with an noun in-between
(ceilingNoun != null && ceilingVerb.getKey().compareTo(ceilingNoun) > 0) ? null : ceilingVerb;
Entry<Integer, Token> verb;
if (ceilingVerb != null && floorVerb != null) {
verb = (index - floorVerb.getKey()) < (ceilingVerb.getKey() - index) ? floorVerb : ceilingVerb;
} else if (ceilingVerb != null) {
verb = ceilingVerb;
} else if (floorVerb != null) {
verb = floorVerb;
} else {
//no verb that can be used as context ... return an area around the current pos.
verb = null;
}
if (verb != null) {
if (verb.getKey().compareTo(index) < 0) {
Integer floorConjunction = conjunctions.floorKey(verb.getKey());
if (floorConjunction != null && floorConjunction.compareTo(Integer.valueOf(Math.max(verb.getKey() - conjuctionContext, sectionSpan[0]))) >= 0) {
//search an other verb in the same direction
floorVerb = verbs.floorEntry(floorConjunction);
if (floorVerb != null && floorVerb.getKey().compareTo(sectionSpan[0]) >= 0 && //do not step over an noun
(floorNoun == null || floorVerb.getKey().compareTo(floorNoun) >= 0)) {
verb = floorVerb;
}
}
} else if (verb.getKey().compareTo(index) > 0) {
Integer ceilingConjunction = conjunctions.ceilingKey(verb.getKey());
if (ceilingConjunction != null && ceilingConjunction.compareTo(Integer.valueOf(Math.min(verb.getKey() + conjuctionContext, sectionSpan[1]))) >= 0) {
//search an other verb in the same direction
ceilingVerb = verbs.floorEntry(ceilingConjunction);
if (ceilingVerb != null && ceilingVerb.getKey().compareTo(sectionSpan[1]) <= 0 && //do not step over an noun
(ceilingNoun == null || ceilingVerb.getKey().compareTo(ceilingNoun) <= 0)) {
verb = ceilingVerb;
}
}
}
context = new Integer[] { Integer.valueOf(verb.getKey() - nounContext), Integer.valueOf(verb.getKey() + nounContext) };
sentiment.setVerb(verb.getValue());
} else {
context = new Integer[] { Integer.valueOf(index - nounContext), Integer.valueOf(index + nounContext) };
}
} else if (pos != null && pos.hasCategory(LexicalCategory.Adjective)) {
//for all other adjective the affected noun is expected directly
//after the noun
context = new Integer[] { index, Integer.valueOf(index + 1) };
} else if (pos != null && pos.hasCategory(LexicalCategory.Noun)) {
//a noun with an sentiment
context = new Integer[] { index, index };
} else {
//else (includes pos == null) return default
context = new Integer[] { Integer.valueOf(index - nounContext), Integer.valueOf(index + nounContext) };
}
//ensure the returned context does not exceed the parsed sectionSpan
if (context[0].compareTo(sectionSpan[0]) < 0) {
context[0] = sectionSpan[0];
}
if (context[1].compareTo(sectionSpan[1]) > 0) {
context[1] = sectionSpan[1];
}
return context;
}
use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.
the class PosChunkerEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
isLangaugeConfigured(this, languageConfiguration, language, true);
//init the PhraseBuilder
ChunkFactory chunkFactory = new ChunkFactoryImpl(at, ci.getLock());
List<PhraseBuilder> phraseBuilders = new ArrayList<PhraseBuilder>(phraseTypeDefinitions.size());
for (PhraseTypeDefinition ptd : phraseTypeDefinitions) {
phraseBuilders.add(new PhraseBuilder(ptd, chunkFactory, minPosScore));
}
Iterator<? extends Section> sentences = at.getSentences();
if (!sentences.hasNext()) {
//no sentences ... iterate over the whole text
sentences = Collections.singleton(at).iterator();
}
while (sentences.hasNext()) {
// (1) get Tokens and POS information for the sentence
Section sentence = sentences.next();
for (PhraseBuilder pb : phraseBuilders) {
pb.nextSection(sentence);
}
Iterator<Token> tokens = sentence.getTokens();
while (tokens.hasNext()) {
Token token = tokens.next();
for (PhraseBuilder pb : phraseBuilders) {
pb.nextToken(token);
}
}
}
//signal the end of the document
for (PhraseBuilder pb : phraseBuilders) {
pb.nextSection(null);
}
// if(log.isTraceEnabled()){
// logChunks(at);
// }
}
use of org.apache.stanbol.enhancer.nlp.model.Token in project stanbol by apache.
the class SentimentSummarizationEngine method extractSentiments.
/**
* Extracts {@link Sentiment}s for words with a {@link NlpAnnotations#SENTIMENT_ANNOTATION}.
* The {@link NlpAnnotations#POS_ANNOTATION}s are used to link those words with
* {@link LexicalCategory#Noun}s.
* @param at the AnalyzedText to process
* @return the {@link Sentiment} instances organised along {@link Sentence}s. If
* no {@link Sentence}s are present on the parsed {@link AnalysedText}, than all
* {@link Sentiment}s are added to the {@link AnalysedText}. Otherwise only
* {@link Sentiment}s not contained within a {@link Sentence} are added to the
* {@link AnalysedText} key.
*/
private List<SentimentPhrase> extractSentiments(AnalysedText at, String language) {
//we do use Sentences (optional) and Tokens (required)
Iterator<Span> tokenIt = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Token));
List<Sentiment> sentimentTokens = new ArrayList<Sentiment>(32);
NavigableMap<Integer, Token> negations = new TreeMap<Integer, Token>();
NavigableMap<Integer, Token> nounsAndPronouns = new TreeMap<Integer, Token>();
NavigableMap<Integer, Token> verbs = new TreeMap<Integer, Token>();
NavigableMap<Integer, Token> conjuctions = new TreeMap<Integer, Token>();
NavigableMap<Integer, Token> sectionBorders = new TreeMap<Integer, Token>();
boolean firstTokenInSentence = true;
Sentence sentence = null;
final List<SentimentPhrase> sentimentPhrases = new ArrayList<SentimentPhrase>();
while (tokenIt.hasNext()) {
Span span = tokenIt.next();
switch(span.getType()) {
case Token:
Token word = (Token) span;
Integer wordIndex = sentimentTokens.size();
Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
boolean addToList = false;
Sentiment sentiment = null;
if (sentimentAnnotation != null && sentimentAnnotation.value() != null && !sentimentAnnotation.value().equals(ZERO)) {
sentiment = new Sentiment(word, sentimentAnnotation.value(), sentence == null || word.getEnd() > sentence.getEnd() ? null : sentence);
addToList = true;
}
if (isNegation((Token) span, language)) {
addToList = true;
negations.put(wordIndex, word);
} else if (isNoun(word, firstTokenInSentence, language) || isPronoun(word, language)) {
addToList = true;
nounsAndPronouns.put(wordIndex, word);
} else if (isSectionBorder(word, language)) {
addToList = true;
sectionBorders.put(wordIndex, word);
} else if (isVerb(word, language)) {
addToList = true;
verbs.put(wordIndex, word);
} else if (isCoordinatingConjuction(word, language)) {
addToList = true;
conjuctions.put(wordIndex, word);
} else if (isCountable(word, language)) {
addToList = true;
}
if (log.isDebugEnabled()) {
Value<PosTag> pos = word.getAnnotation(NlpAnnotations.POS_ANNOTATION);
log.debug(" [{}] '{}' pos: {}, sentiment {}", new Object[] { addToList ? sentimentTokens.size() : "-", word.getSpan(), pos.value().getCategories(), sentiment == null ? "none" : sentiment.getValue() });
}
if (addToList) {
//add the token
sentimentTokens.add(sentiment);
}
firstTokenInSentence = false;
break;
case Sentence:
//cleanup the previous sentence
sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders));
negations.clear();
nounsAndPronouns.clear();
sentimentTokens.clear();
verbs.clear();
sectionBorders.clear();
firstTokenInSentence = true;
sentence = (Sentence) span;
break;
case TextSection:
break;
default:
break;
}
}
sentimentPhrases.addAll(summarizeSentence(sentimentTokens, negations, nounsAndPronouns, verbs, conjuctions, sectionBorders));
return sentimentPhrases;
}
Aggregations