use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class IndexedGraphTest method filterIteratorRemove.
@Test
public void filterIteratorRemove() {
Graph itc = new IndexedGraph();
itc.add(triple1);
itc.add(triple2);
itc.add(triple3);
itc.add(triple4);
itc.add(triple5);
Iterator<Triple> iter = itc.filter(uriRef1, null, null);
while (iter.hasNext()) {
iter.next();
iter.remove();
}
Assert.assertEquals(3, itc.size());
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class DisambiguatorEngine method removeOldConfidenceFromGraph.
/*
* Finds values the lie in intersection of both the set of disambiguations( the one intially suggested and
* the one from dpedia). Update the confidence values of those and make the confidence values of others as
* 0 in gainconfidence list
*/
// NOTE (rwesten): intersection is calculated as part of the disambiguateSuggestions(..)
// method. Results are stored in the Suggestions (member of SavedEntiy) and
// than written back to the EnhancementStructure in a separate step
// protected List<Triple> intersection(List<Suggestion> matches,
// List<IRI> subsumed,
// Graph graph,
// List<Triple> gainConfidence,
// String contentLangauge) {
//
// for (int i = 0; i < subsumed.size(); i++) {
// boolean matchFound = false;
// IRI uri = subsumed.get(i);
//
// IRI uri1 = EnhancementEngineHelper.getReference(graph, uri, new IRI(NamespaceEnum.fise
// + "entity-reference"));
//
// for (int j = 0; j < matches.size(); j++) {
// Suggestion suggestion = matches.get(j);
// String suggestName = suggestion.getURI();
//
// if (suggestName != null && uri1 != null
// && suggestName.compareToIgnoreCase(uri1.getUnicodeString()) == 0) {
// Triple confidenceTriple = new TripleImpl(uri, ENHANCER_CONFIDENCE, LiteralFactory
// .getInstance().createTypedLiteral(suggestion.getScore()));
// Triple contributorTriple = new TripleImpl((IRI) confidenceTriple.getSubject(),
// new IRI(NamespaceEnum.dc + "contributor"), LiteralFactory.getInstance()
// .createTypedLiteral(this.getClass().getName()));
// gainConfidence.add(confidenceTriple);
// gainConfidence.add(contributorTriple);
// matchFound = true;
// }
// }
//
// if (!matchFound) {
// Triple confidenceTriple = new TripleImpl(uri, ENHANCER_CONFIDENCE, LiteralFactory
// .getInstance().createTypedLiteral(0.0));
// Triple contributorTriple = new TripleImpl((IRI) confidenceTriple.getSubject(), new IRI(
// NamespaceEnum.dc + "contributor"), LiteralFactory.getInstance().createTypedLiteral(
// this.getClass().getName()));
// gainConfidence.add(confidenceTriple);
// gainConfidence.add(contributorTriple);
// }
// }
//
// return gainConfidence;
// }
/* Removes the value in lose confidence from the graph */
protected void removeOldConfidenceFromGraph(Graph graph, List<Triple> loseConfidence) {
for (int i = 0; i < loseConfidence.size(); i++) {
Triple elementToRemove = loseConfidence.get(i);
graph.remove(elementToRemove);
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class DisambiguatorEngine method cloneTextAnnotation.
/**
* This creates a 'clone' of the fise:EntityAnnotation where the original does no longer have a
* dc:relation to the parsed fise:TextAnnotation and the created clone does only have a dc:relation to the
* parsed fise:TextAnnotation.
* <p>
* This is required by disambiguation because other engines typically only create a single
* fise:EntityAnnotation instance if several fise:TextAnnotation do have the same fise:selected-text
* values. So for a text that multiple times mentions the same Entity (e.g. "Paris") there will be
* multiple fise:TextAnnotations selecting the different mentions of that Entity, but there will be only a
* single set of suggestions - fise:EntityAnnotations (e.g. "Paris, France" and "Paris, Texas"). Now lets
* assume a text like
*
* <pre>
* Paris is the capital of France and it is worth a visit for sure. But
* one can also visit Paris without leaving the United States as there
* is also a city with the same name in Texas.
* </pre>
*
* Entity Disambiguation need to be able to have different fise:confidence values for the first and second
* mention of Paris and this is only possible of the fise:TextAnnotations of those mentions do NOT refer
* to the same set of fise:EntityAnnotations.
* <p>
* This methods accomplished exactly that as it
* <ul>
* <li>creates a clone of a fise:EntityAnnotation
* <li>removes the dc:relation link to the 2nd mention of Paris from the original
* <li>only adds the dc:relation of the end mention to the clone
* </ul>
* So in the end you will have two fise:EntityAnnotation
* <ul>
* <li>the original fise:EntityAnnotation with dc:relation to all fise:TextAnnotations other than the 2nd
* mention (the one this method was called for)
* <li>the cloned fise:EntityAnnnotation with a dc:relation to the 2nd mention.
* </ul>
*
* @param graph
* @param entityAnnotation
* @param textAnnotation
* @return
*/
public static IRI cloneTextAnnotation(Graph graph, IRI entityAnnotation, IRI textAnnotation) {
IRI copy = new IRI("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
Iterator<Triple> it = graph.filter(entityAnnotation, null, null);
// we can not add triples to the graph while iterating. So store them
// in a list and add later
List<Triple> added = new ArrayList<Triple>(32);
while (it.hasNext()) {
Triple triple = it.next();
if (DC_RELATION.equals(triple.getPredicate())) {
if (triple.getObject().equals(textAnnotation)) {
// remove the dc relation to the currently processed
// textAnnotation from the original
it.remove();
// and add it to the copy
added.add(new // use the copy as subject!
TripleImpl(// use the copy as subject!
copy, triple.getPredicate(), triple.getObject()));
}
// else it is not the currently processed TextAnnotation
// so we need to keep in in the original and NOT add
// it to the copy
} else {
// we can copy all other information 1:1
added.add(new // use the copy as subject!
TripleImpl(// use the copy as subject!
copy, triple.getPredicate(), triple.getObject()));
}
}
graph.addAll(added);
return copy;
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class EntityCoMentionEngine method writeComentions.
private void writeComentions(ContentItem ci, Collection<LinkedEntity> comentions, String language, Set<IRI> textAnnotations) {
Language languageObject = null;
if (language != null && !language.isEmpty()) {
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
// we MUST adjust the confidence level of existing annotations only once
// se we need to keep track of those
Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
log.debug("Write Co-Mentions:");
for (LinkedEntity comention : comentions) {
log.debug(" > {}", comention);
// URIs of TextAnnotations for the initial mention of this co-mention
Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
for (Suggestion suggestion : comention.getSuggestions()) {
Entity entity = suggestion.getEntity();
if (textAnnotations.contains(entity.getUri())) {
// if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
// this is a textAnnotation
initialMentions.add(entity.getUri());
}
// else TODO support also Entities!!
}
// create the TextAnnotations for the co-mention
for (Occurrence occurrence : comention.getOccurrences()) {
Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
// search for existing text annotation
boolean ignore = false;
// search for textAnnotations with the same end
IRI textAnnotation = null;
Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
while (it.hasNext()) {
Triple t = it.next();
Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
if (end != null && textAnnotations.contains(t.getSubject())) {
// metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI) t.getSubject();
if (end > occurrence.getEnd()) {
// there is an other TextAnnotation selecting a bigger Span
// so we should ignore this Occurrence
ignore = true;
}
}
}
it = metadata.filter(null, ENHANCER_END, endLiteral);
while (it.hasNext()) {
Triple t = it.next();
Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
if (start != null && textAnnotations.contains(t.getSubject())) {
// metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI) t.getSubject();
if (start < occurrence.getStart()) {
// there is an other TextAnnotation selecting a bigger Span
// so we should ignore this Occurrence
ignore = true;
}
}
}
if (!ignore) {
// collect confidence values of co-mentions
// maximum confidence of suggestions of the initial mention
Double maxConfidence = null;
// maximum confidence of existing suggestions
Double maxExistingConfidence = null;
if (textAnnotation == null) {
// not found ... create a new TextAnnotation for the co-mention
textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
// add it to the set of TextAnnotations
textAnnotations.add(textAnnotation);
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
} else {
// if existing add this engine as contributor
metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
// maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation,
// ENHANCER_CONFIDENCE, Double.class, literalFactory);
}
// now process initial mention(s) for the co-mention
Set<IRI> dcTypes = new HashSet<IRI>();
for (IRI initialMention : initialMentions) {
// get the dc:type(s) of the initial mentions
Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
while (dcTypesIt.hasNext()) {
dcTypes.add(dcTypesIt.next());
}
// check confidence of the initial mention (fise:TextAnnotation)
Double confidnece = EnhancementEngineHelper.get(metadata, initialMention, ENHANCER_CONFIDENCE, Double.class, literalFactory);
if (confidnece != null) {
if (maxConfidence == null) {
maxConfidence = confidnece;
} else if (maxConfidence.compareTo(confidnece) <= 0) {
maxConfidence = confidnece;
}
}
// else nothing to do
// now we need to compare the suggestions of the initial
// mention(s) with the existing one.
// Get information about the suggestions of the initial mention
Map<RDFTerm, Double> initialSuggestions = new HashMap<RDFTerm, Double>();
Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm, RDFTerm>();
for (Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext(); ) {
if (!textAnnotations.contains(suggestions)) {
BlankNodeOrIRI suggestion = suggestions.next().getSubject();
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
if (suggestedEntity != null) {
// it has a suggestion
Double confidence = EnhancementEngineHelper.get(metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
if (maxConfidence == null) {
maxConfidence = confidence;
} else if (confidnece != null && maxConfidence.compareTo(confidnece) <= 0) {
maxConfidence = confidnece;
}
// else nothing to do
initialSuggestions.put(suggestion, confidence);
initialSuggestedEntities.put(suggestedEntity, suggestion);
}
// no suggestion (dc:relation to some other resource)
}
// else ignore dc:relation to other fise:TextAnnotations
}
// now we collect existing Suggestions for this TextAnnoation where we need
// to adjust the confidence (quite some things to check ....)
Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI, Double>();
if (maxConfidence != null && confidenceAdjustmentFactor < 1) {
// suggestions are defined by incoming dc:releation
for (Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation); esIt.hasNext(); ) {
BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
// but not all of them are suggestions
if (!textAnnotations.contains(existingSuggestion)) {
// ignore fise:TextAnnotations
Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
// ignore fise:TextAnnotations also suggested for the initial mention
if (!initialSuggestions.containsKey(existingSuggestion)) {
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
// suggestions for the initial mention
if (!initialSuggestedEntities.containsKey(suggestedEntity)) {
// finally make sure that we adjust confidences only once
if (!adjustedSuggestions.contains(existingSuggestion)) {
existingSuggestions.put(existingSuggestion, existingConfidence);
}
// else confidence already adjusted
} else {
// different fise:EntityAnnotation, but same reference Entity
// we need to check confidences to decide what to do
RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
Double initialConfidence = initialSuggestions.get(initialSuggestion);
if (initialConfidence == null || (existingConfidence != null && existingConfidence.compareTo(initialConfidence) >= 0)) {
// existing confidence >= initial .. keep existing
initialSuggestions.remove(initialSuggestion);
if (maxExistingConfidence == null) {
maxExistingConfidence = existingConfidence;
} else if (maxExistingConfidence.compareTo(existingConfidence) <= 0) {
maxExistingConfidence = existingConfidence;
}
} else {
// adjust this one (if not yet adjusted)
if (!adjustedSuggestions.contains(existingSuggestion)) {
existingSuggestions.put(existingSuggestion, existingConfidence);
}
}
}
} else {
// a initial mention already present
// no need to process initial mention
initialSuggestions.remove(existingSuggestion);
if (maxExistingConfidence == null) {
maxExistingConfidence = existingConfidence;
} else if (existingConfidence != null && maxExistingConfidence.compareTo(existingConfidence) <= 0) {
maxExistingConfidence = existingConfidence;
}
// else maxExistingConfidence == null (undefined)
}
}
// else ignore dc:relations to other fise:TextAnnotations
}
for (Entry<BlankNodeOrIRI, Double> entry : existingSuggestions.entrySet()) {
if (entry.getValue() != null) {
double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
if (maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence) {
maxExistingConfidence = adjustedConfidence;
}
EnhancementEngineHelper.set(metadata, entry.getKey(), ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
// mark as adjusted
adjustedSuggestions.add(entry.getKey());
}
}
}
// add the suggestions of the initial mention to this one
for (RDFTerm suggestion : initialSuggestions.keySet()) {
metadata.add(new TripleImpl((BlankNodeOrIRI) suggestion, DC_RELATION, textAnnotation));
}
// finally link the co-mentation with the initial one
metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
// metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
}
// Adapt the dc:type values of the fise:TextAnnotation
// - if Suggestions added by this engine do have the max confidence
// use the dc:type values of the initial mention
// - if the original suggestions do have a higher confidence keep the
// existing
// - in case both do have the same confidence we add all dc:types
boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null || maxConfidence.compareTo(maxExistingConfidence) >= 0);
boolean addCoMentionDcTypes = maxExistingConfidence == null || (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
while (existingDcTypesIt.hasNext()) {
// removeExistingDcTypes == true
if ((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes) && removeExistingDcTypes) {
// remove the dcType
existingDcTypesIt.remove();
}
}
if (addCoMentionDcTypes) {
for (IRI dcType : dcTypes) {
// add missing
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
}
}
// TODO: support also Entities
if (maxConfidence != null) {
// set the confidence value (if known)
EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
}
}
// else ignore this occurence
}
}
}
use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.
the class LocationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
IRI contentItemId = ci.getUri();
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// get all the textAnnotations
/*
* this Map holds the name as key and all the text annotations of
* dc:type dbpedia:Place that select this name as value
* this map is used to avoid multiple lookups for text annotations
* selecting the same name.
*/
Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
while (iterator.hasNext()) {
// the enhancement annotating an place
BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
// this can still be an TextAnnotation of an EntityAnnotation
// so we need to filter TextAnnotation
Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
if (graph.contains(isTextAnnotation)) {
// now get the name
String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
if (name == null) {
log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
} else {
Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
if (placeEnhancements == null) {
placeEnhancements = new ArrayList<BlankNodeOrIRI>();
name2placeEnhancementMap.put(name, placeEnhancements);
}
placeEnhancements.add(placeEnhancement);
}
} else {
// TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
// than we need to parse the name based on the enhancer:entity-name property
}
}
// Now we do have all the names we need to lookup
Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
if (getMaxLocationEnhancements() != null) {
requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
}
for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
List<Toponym> results;
try {
requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
results = geonamesService.searchToponyms(requestParams);
} catch (Exception e) {
/*
* TODO: Review if it makes sense to catch here for each name, or
* to catch the whole loop.
* This depends if single requests can result in Exceptions
* (e.g. because of encoding problems) or if usually Exceptions
* are thrown because of general things like connection issues
* or service unavailability.
*/
throw new EngineException(this, ci, e);
}
if (results != null) {
Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}", result.getGeoNameId(), result.getName());
Double score = getToponymScore(result, maxScore);
log.debug(" > score {}", score);
if (score != null) {
if (score < minScore) {
// if score is lower than the under bound, than stop
break;
}
} else {
log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
/*
* NOTE: If score is not present all suggestions are
* added as enhancements to the metadata of the content
* item.
*/
}
// write the enhancement!
BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}", score, minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
// get the hierarchy
try {
Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
for (int level = 0; hierarchy.hasNext(); level++) {
Toponym hierarchyEntry = hierarchy.next();
// maybe add an configuration
if (level == 0) {
// Mother earth -> ignore
continue;
}
// write it as dependent to the locationEnhancement
if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
// TODO: add additional checks based on possible
// configuration here!
log.debug(" - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
/*
* The hierarchy service dose not provide a score, because it would be 1.0
* so we need to set the score to this value.
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
}
}
}
}
}
}
Aggregations