use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class SpotlightEngineUtils method createTextEnhancement.
/**
* Creates a fise:TextAnnotation for the parsed parameters and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param occ the SurfaceForm
* @param engine the Engine
* @param ci the ContentITem
* @param content the content
* @param lang the language of the content or <code>null</code>
* @return the URI of the created fise:TextAnnotation
*/
public static IRI createTextEnhancement(SurfaceForm occ, EnhancementEngine engine, ContentItem ci, String content, Language lang) {
Graph model = ci.getMetadata();
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, engine);
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.name, lang)));
model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.name.length())));
if (occ.type != null && !occ.type.isEmpty()) {
model.add(new TripleImpl(textAnnotation, DC_TYPE, new IRI(occ.type)));
}
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(content, occ.name, occ.offset), lang)));
return textAnnotation;
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class DBPSpotlightCandidatesEnhancementEngine method createEnhancements.
/**
* This generates enhancement structures for the entities from DBPedia
* Spotlight and adds them to the content item's metadata. For each surface
* form a TextAnnotation and the according EntityAnnotations are created.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String text, Language language) {
// TODO create TextEnhancement (form, start, end, type?)
HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
Graph model = ci.getMetadata();
for (SurfaceForm occ : occs) {
IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, text, language);
Iterator<CandidateResource> resources = occ.resources.iterator();
while (resources.hasNext()) {
CandidateResource resource = resources.next();
IRI entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(resource, this, ci, textAnnotation);
entityAnnotationMap.put(resource.localName, entityAnnotation);
}
if (entityAnnotationMap.containsKey(occ.name)) {
model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
} else {
entityAnnotationMap.put(occ.name, textAnnotation);
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class SpotlightEngineUtils method createEntityAnnotation.
/**
* Creates a fise:EntityAnnotation for the parsed parameter and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param annotation the Annotation
* @param engine the engine
* @param ci the language
* @param textAnnotation the TextAnnotation the created
* EntityAnnotation links by using dc:relation
* @param language the language of the label of the referenced
* Entity (or <code>null</code> if none).
*/
public static void createEntityAnnotation(Annotation annotation, EnhancementEngine engine, ContentItem ci, IRI textAnnotation, Language language) {
Graph model = ci.getMetadata();
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
Literal label = new PlainLiteralImpl(annotation.surfaceForm.name, language);
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, annotation.uri));
//set the fise:entity-type
for (String type : annotation.getTypeNames()) {
IRI annotationType = new IRI(type);
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, annotationType));
}
//TODO (rwesten): Pleas check: I use the similarityScore as fise:confidence value
model.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(annotation.similarityScore)));
//add spotlight specific information
model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(annotation.percentageOfSecondRank)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(annotation.support)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SIMILARITY_SCORE, literalFactory.createTypedLiteral(annotation.similarityScore)));
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class TextAnnotationsNewModelEngine method computeEnhancements.
/**
* Computes the enhancements on the provided ContentItem.
*/
@Override
public void computeEnhancements(ContentItem contentItem) throws EngineException {
Entry<IRI, Blob> textBlob = getBlob(contentItem, supportedMimeTypes);
if (textBlob == null) {
return;
}
String language = EnhancementEngineHelper.getLanguage(contentItem);
Language lang = language == null ? null : new Language(language);
String text;
try {
text = ContentItemHelper.getText(textBlob.getValue());
} catch (IOException e) {
throw new EngineException(this, contentItem, "Unable to read Plain Text Blob", e);
}
Set<Triple> addedTriples = new HashSet<Triple>();
Graph metadata = contentItem.getMetadata();
//extract all the necessary information within a read lock
contentItem.getLock().readLock().lock();
try {
Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
while (it.hasNext()) {
BlankNodeOrIRI ta = it.next().getSubject();
boolean hasPrefix = metadata.filter(ta, ENHANCER_SELECTION_PREFIX, null).hasNext();
boolean hasSuffix = metadata.filter(ta, ENHANCER_SELECTION_SUFFIX, null).hasNext();
boolean hasSelected = metadata.filter(ta, ENHANCER_SELECTED_TEXT, null).hasNext();
if (hasPrefix && hasSuffix && hasSelected) {
//this TextAnnotation already uses the new model
continue;
}
Integer start;
if (!hasPrefix) {
start = EnhancementEngineHelper.get(metadata, ta, ENHANCER_START, Integer.class, lf);
if (start == null) {
log.debug("unable to add fise:selection-prefix to TextAnnotation {} " + "because fise:start is not present", ta);
} else if (start < 0) {
log.warn("fise:start {} of TextAnnotation {} < 0! " + "Will not transform this TextAnnotation", start, ta);
start = 0;
}
} else {
start = null;
}
Integer end;
if (!hasSuffix) {
end = EnhancementEngineHelper.get(metadata, ta, ENHANCER_END, Integer.class, lf);
if (end == null) {
log.debug("unable to add fise:selection-suffix to TextAnnotation {} " + "because fise:end is not present", ta);
} else if (end > text.length()) {
log.warn("fise:end {} of TextAnnotation {} > as the content length {}! " + "Will not transform this TextAnnotation", end, ta, text.length());
end = null;
} else if (start != null && end < start) {
log.warn("fise:end {} < fise:start {} of TextAnnotation {}! " + "Will not transform this TextAnnotation", end, start, ta);
end = null;
start = null;
}
} else {
end = null;
}
if (!hasPrefix && start != null) {
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(text.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
}
if (!hasSuffix && end != null) {
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(text.substring(end, Math.min(text.length(), end + prefixSuffixSize)), lang)));
}
if (!hasSelected && start != null && end != null) {
//This adds missing fise:selected or fise:head/fise:tail if the selected text is to long
int length = end - start;
if (length > 3 * prefixSuffixSize) {
//add prefix/suffix
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(text.substring(start, start + prefixSuffixSize), lang)));
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(text.substring(end - prefixSuffixSize, end), lang)));
} else {
//add missing fise:selected
String selection = text.substring(start, end);
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(selection, lang)));
//check if we should also add an selection context
if (!metadata.filter(ta, ENHANCER_SELECTION_CONTEXT, null).hasNext()) {
addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(EnhancementEngineHelper.getSelectionContext(text, selection, start), lang)));
}
}
}
}
} finally {
contentItem.getLock().readLock().unlock();
}
//finally write the prefix/suffix triples within a write lock
if (!addedTriples.isEmpty()) {
contentItem.getLock().writeLock().lock();
try {
metadata.addAll(addedTriples);
} finally {
contentItem.getLock().writeLock().unlock();
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.
the class SentimentSummarizationEngine method writeSentimentEnhancements.
private void writeSentimentEnhancements(ContentItem ci, List<SentimentPhrase> sentimentPhrases, AnalysedText at, Language lang) {
// TODO Auto-generated method stub
Graph metadata = ci.getMetadata();
Sentence currentSentence = null;
final List<SentimentPhrase> sentencePhrases = new ArrayList<SentimentPhrase>();
for (SentimentPhrase sentPhrase : sentimentPhrases) {
Sentence sentence = sentPhrase.getSentence();
if (log.isDebugEnabled()) {
//debug sentiment info
CharSequence phraseText = at.getText().subSequence(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
log.debug("Write SentimentPhrase for {} (sentence: {})", phraseText, sentence == null ? "none" : sentence.getSpan().length() > 17 ? (sentence.getSpan().subSequence(0, 17) + "...") : sentence.getSpan());
List<Sentiment> sentiments = sentPhrase.getSentiments();
log.debug(" > {} Sentiments:", sentiments.size());
for (int i = 0; i < sentiments.size(); i++) {
log.debug(" {}. {}", i + 1, sentiments.get(i));
}
}
if (writeSentimentPhrases) {
IRI enh = createTextEnhancement(ci, this);
String phraseText = at.getSpan().substring(sentPhrase.getStartIndex(), sentPhrase.getEndIndex());
metadata.add(new TripleImpl(enh, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(phraseText, lang)));
if (sentPhrase.getSentence() == null) {
metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(at.getSpan(), phraseText, sentPhrase.getStartIndex()), lang)));
} else {
metadata.add(new TripleImpl(enh, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(sentPhrase.getSentence().getSpan(), lang)));
}
metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(sentPhrase.getStartIndex())));
metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(sentPhrase.getEndIndex())));
if (sentPhrase.getPositiveSentiment() != null) {
metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getPositiveSentiment())));
}
if (sentPhrase.getNegativeSentiment() != null) {
metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getNegativeSentiment())));
}
metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentPhrase.getSentiment())));
//add the Sentiment type as well as the type of the SSO Ontology
metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE));
IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(SpanTypeEnum.Chunk);
if (ssoType != null) {
metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
}
}
if (writeSentencesSentimet && sentence != null) {
if (sentence.equals(currentSentence)) {
sentencePhrases.add(sentPhrase);
} else {
writeSentiment(ci, currentSentence, sentencePhrases);
//reset
currentSentence = sentence;
sentencePhrases.clear();
sentencePhrases.add(sentPhrase);
}
}
}
if (!sentencePhrases.isEmpty()) {
writeSentiment(ci, currentSentence, sentencePhrases);
}
if (writeDocumentSentiment) {
writeSentiment(ci, at, sentimentPhrases);
}
}
Aggregations