use of org.apache.clerezza.commons.rdf.Language in project stanbol by apache.
the class FstLinkingEngine method writeEnhancements.
/**
* Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
* extracted from the parsed ContentItem
* @param ci
* @param tags
* @param language
*/
private void writeEnhancements(ContentItem ci, String text, Collection<Tag> tags, String language, boolean writeRankings) {
Language languageObject = null;
if (language != null && !language.isEmpty()) {
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
for (Tag tag : tags) {
Collection<IRI> textAnnotations = new ArrayList<IRI>(tags.size());
// first create the TextAnnotations for the Occurrences
Literal startLiteral = literalFactory.createTypedLiteral(tag.getStart());
Literal endLiteral = literalFactory.createTypedLiteral(tag.getEnd());
// search for existing text annotation
Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
IRI textAnnotation = null;
while (it.hasNext()) {
Triple t = it.next();
if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
textAnnotation = (IRI) t.getSubject();
break;
}
}
if (textAnnotation == null) {
// not found ... create a new one
textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, tag.getAnchor(), tag.getStart()), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(tag.getAnchor(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(tag.getScore())));
} else {
// if existing add this engine as contributor
metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
}
// add dc:types (even to existing)
for (IRI dcType : getDcTypes(tag.getSuggestions())) {
metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
}
textAnnotations.add(textAnnotation);
// now the EntityAnnotations for the Suggestions
for (Match match : tag.getSuggestions()) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
// should we use the label used for the match, or search the
// representation for the best label ... currently its the matched one
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, match.getMatchLabel()));
metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, new IRI(match.getUri())));
for (IRI type : match.getTypes()) {
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, type));
}
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(match.getScore())));
// add the relation to the fise:TextAnnotation (the tag)
metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
// write origin information
if (indexConfig.getOrigin() != null) {
metadata.add(new TripleImpl(entityAnnotation, FISE_ORIGIN, indexConfig.getOrigin()));
}
// }
if (writeRankings) {
Double ranking = match.getRanking();
if (ranking != null) {
metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, literalFactory.createTypedLiteral(ranking)));
}
}
// TODO: dereferencing
// if(linkerConfig.isDereferenceEntitiesEnabled() &&
// dereferencedEntitis.add(entity.getUri())){ //not yet dereferenced
// //add all outgoing triples for this entity
// //NOTE: do not add all triples as there might be other data in the graph
// for(Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null);
// triples.hasNext();metadata.add(triples.next()));
// }
}
}
}
use of org.apache.clerezza.commons.rdf.Language in project stanbol by apache.
the class SentimentSummarizationEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
String language = NlpEngineHelper.getLanguage(this, ci, true);
AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
// configure the spanTypes based on the configuration
// EnumSet<Span.SpanTypeEnum> spanTypes = EnumSet.noneOf(SpanTypeEnum.class);
// if(writeSentimentPhrases){
// spanTypes.add(SpanTypeEnum.Chunk);
// }
// if(writeSentencesSentimet){
// spanTypes.add(SpanTypeEnum.Sentence);
// }
// if(writeTextSectionSentiments){
// spanTypes.add(SpanTypeEnum.TextSection);
// }
// if(writeTextSentiments ){
// spanTypes.add(SpanTypeEnum.Text);
// }
List<SentimentPhrase> sentiments = extractSentiments(at, language);
String detectedLang = EnhancementEngineHelper.getLanguage(ci);
ci.getLock().writeLock().lock();
try {
writeSentimentEnhancements(ci, sentiments, at, detectedLang == null ? null : new Language(detectedLang));
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.commons.rdf.Language in project stanbol by apache.
the class RdfResourceUtils method getLiteralValues.
/**
* Extracts the literal values for the given list of languages (<code>null</code>
* is supported).
* <p>
* Multiple languages are supported by this method to allow parsing
* <code>null</code> in addition to a language. This is often used by applications
* to search for literals in a given language in addition to literals with no
* defined language.
* <p>
* As a convenience this methods adds literals with a language tag to the
* front of the list and literals with no language tag to the end.
*
* @param literals the iterator over the literals
* @param languages the array of languages (<code>null</code> is supported).
* @return The collection with all the literal values.
*/
public static List<String> getLiteralValues(Iterator<Literal> literals, String... languages) {
// permits null element!
Set<Language> languageSet = new HashSet<Language>();
for (String lang : languages) {
languageSet.add(getLanguage(lang));
}
boolean containsNull = languageSet.contains(null);
List<String> results = new ArrayList<String>();
while (literals.hasNext()) {
Literal act = literals.next();
if (act.getLanguage() != null) {
if (languageSet.contains(act.getLanguage())) {
// add to front
results.add(0, act.getLexicalForm());
}
} else if (containsNull) {
// add also all types Literals, because the do not define an language!
// append to the end
results.add(act.getLexicalForm());
}
}
return results;
}
use of org.apache.clerezza.commons.rdf.Language in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementEngine method computeEnhancements.
/**
* Calculate the enhancements by doing a POST request to the DBpedia
* Spotlight endpoint and processing the results
*
* @param ci
* the {@link ContentItem}
*/
public void computeEnhancements(ContentItem ci) throws EngineException {
Language language = SpotlightEngineUtils.getContentLanguage(ci);
String text = SpotlightEngineUtils.getPlainContent(ci);
// Retrieve the existing text annotations (requires read lock)
Graph graph = ci.getMetadata();
String xmlTextAnnotations = this.getSpottedXml(text, graph);
Collection<Annotation> dbpslGraph = doPostRequest(text, xmlTextAnnotations, ci.getUri());
if (dbpslGraph != null) {
// Acquire a write lock on the ContentItem when adding the
// enhancements
ci.getLock().writeLock().lock();
try {
createEnhancements(dbpslGraph, ci, language);
if (log.isDebugEnabled()) {
Serializer serializer = Serializer.getInstance();
ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
try {
log.debug("DBpedia Enhancements:\n{}", debugStream.toString("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
}
use of org.apache.clerezza.commons.rdf.Language in project stanbol by apache.
the class CeliSentimentAnalysisEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
// used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<SentimentExpression> lista = this.client.extractSentimentExpressions(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (SentimentExpression se : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
// add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(se.getSnippetStr(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION));
if (se.getStartSnippet() != null && se.getEndSnippet() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(se.getStartSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(se.getEndSnippet().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, se.getSnippetStr(), se.getStartSnippet()), lang)));
g.add(new TripleImpl(textAnnotation, CeliConstants.HAS_SENTIMENT_EXPRESSION_POLARITY, literalFactory.createTypedLiteral(se.getSentimentPolarityAsDoubleValue())));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI Sentiment Analysis service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/response to the CELI Sentiment Analysis service!", e);
}
}
Aggregations