use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ExecutionPlanHelper method createExecutionPlan.
/**
* Creates an ExecutionPlan for the parsed chainName in the parsed ImmutableGraph
* @param graph the graph
* @param chainName the chain name
* @param enhProps the map with the enhancement properties defined for the
* chain or <code>null</code> if none
* @return the node representing the ex:ExecutionPlan
* @since 0.12.1
*/
public static BlankNodeOrIRI createExecutionPlan(Graph graph, String chainName, Map<String, Object> enhProps) {
if (graph == null) {
throw new IllegalArgumentException("The parsed Graph MUST NOT be NULL!");
}
if (chainName == null || chainName.isEmpty()) {
throw new IllegalArgumentException("The parsed Chain name MUST NOT be NULL nor empty!");
}
BlankNodeOrIRI node = new BlankNode();
graph.add(new TripleImpl(node, RDF_TYPE, EXECUTION_PLAN));
graph.add(new TripleImpl(node, CHAIN, new PlainLiteralImpl(chainName)));
writeEnhancementProperties(graph, node, null, enhProps);
return node;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ExecutionPlanHelper method getExecutionNodes.
/**
* Getter for the set of ExecutionNodes part of an execution plan.
* @param ep the execution plan graph
* @param executionPlanNode the execution plan node
*/
public static Set<BlankNodeOrIRI> getExecutionNodes(Graph ep, final BlankNodeOrIRI executionPlanNode) {
if (ep == null) {
throw new IllegalArgumentException("The parsed graph with the Executionplan MUST NOT be NULL!");
}
if (executionPlanNode == null) {
throw new IllegalArgumentException("The parsed execution plan node MUST NOT be NULL!");
}
Set<BlankNodeOrIRI> executionNodes = new HashSet<BlankNodeOrIRI>();
Iterator<Triple> it = ep.filter(executionPlanNode, HAS_EXECUTION_NODE, null);
while (it.hasNext()) {
Triple t = it.next();
RDFTerm node = t.getObject();
if (node instanceof BlankNodeOrIRI) {
executionNodes.add((BlankNodeOrIRI) node);
} else {
throw new IllegalStateException("The value of the " + HAS_EXECUTION_NODE + " property MUST BE a BlankNodeOrIRI (triple: " + t + ")!");
}
}
return executionNodes;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class EnhancementPropertyTest method testExecutionPropertySupportOfExecutionPlanHelper.
/**
* This tests if the {@link ExecutionPlanHelper} correctly adds Enhancement
* Properties to generated Execution plans. <p>
* NOTE: If this fails also tests testing chain level properties are expected
* to fail. This only present to validate that the ExecutionPlan is correctly
* generated by the {@link ExecutionPlanHelper}
* @throws ChainException
*/
@Test
public void testExecutionPropertySupportOfExecutionPlanHelper() throws ChainException {
//the value we are setting
Collection<String> derefernceLanguages = Arrays.asList("en", "de");
Integer maxSuggestions = Integer.valueOf(5);
IRI maxSuggestionsProperty = new IRI(NamespaceEnum.ehp + PROPERTY_MAX_SUGGESTIONS);
IRI dereferenceLanguagesProperty = new IRI(NamespaceEnum.ehp + PROPERTY_DEREFERENCE_LANGUAGES);
//set up the map with the enhancement properties we want to set for the
//Enhancement Chain
Map<String, Map<String, Object>> enhancementProperties = new HashMap<String, Map<String, Object>>();
Map<String, Object> chainProperties = new HashMap<String, Object>();
chainProperties.put(PROPERTY_MAX_SUGGESTIONS, maxSuggestions);
enhancementProperties.put(null, chainProperties);
Map<String, Object> linkingProperties = new HashMap<String, Object>();
linkingProperties.put(PROPERTY_DEREFERENCE_LANGUAGES, derefernceLanguages);
enhancementProperties.put(linking.getName(), linkingProperties);
//create the ExecutionPlan
ImmutableGraph ep = ExecutionPlanHelper.calculateExecutionPlan("test", engines, Collections.<String>emptySet(), Collections.<String>emptySet(), enhancementProperties);
//now assert that the enhancement properties where correctly written
//first the property we set on the chain level
BlankNodeOrIRI epNode = ExecutionPlanHelper.getExecutionPlan(ep, "test");
assertNotNull(epNode);
Iterator<Triple> maxSuggestionValues = ep.filter(epNode, maxSuggestionsProperty, null);
assertTrue(maxSuggestionValues.hasNext());
RDFTerm maxSuggestionValue = maxSuggestionValues.next().getObject();
assertFalse(maxSuggestionValues.hasNext());
assertTrue(maxSuggestionValue instanceof Literal);
assertEquals(maxSuggestions.toString(), ((Literal) maxSuggestionValue).getLexicalForm());
assertEquals(maxSuggestions, LiteralFactory.getInstance().createObject(Integer.class, (Literal) maxSuggestionValue));
//second the property we set for the linking engine
boolean found = false;
for (BlankNodeOrIRI ee : ExecutionPlanHelper.getExecutionNodes(ep, epNode)) {
String engineName = ExecutionPlanHelper.getEngine(ep, ee);
if (linking.getName().equals(engineName)) {
found = true;
Iterator<Triple> derefLangValues = ep.filter(ee, dereferenceLanguagesProperty, null);
assertTrue(derefLangValues.hasNext());
int numValues = 0;
while (derefLangValues.hasNext()) {
RDFTerm r = derefLangValues.next().getObject();
assertTrue(r instanceof Literal);
assertTrue(derefernceLanguages.contains(((Literal) r).getLexicalForm()));
numValues++;
}
assertEquals(derefernceLanguages.size(), numValues);
}
}
assertTrue("ExecutionNode for the Linking Engine was not present!", found);
//NOTE: this does not validate that there are no other (not expected)
// enhancement properties in the executionPlan
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class EntityCoMentionEngine method writeComentions.
private void writeComentions(ContentItem ci, Collection<LinkedEntity> comentions, String language, Set<IRI> textAnnotations) {
Language languageObject = null;
if (language != null && !language.isEmpty()) {
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
//we MUST adjust the confidence level of existing annotations only once
//se we need to keep track of those
Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
log.debug("Write Co-Mentions:");
for (LinkedEntity comention : comentions) {
log.debug(" > {}", comention);
//URIs of TextAnnotations for the initial mention of this co-mention
Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
for (Suggestion suggestion : comention.getSuggestions()) {
Entity entity = suggestion.getEntity();
if (textAnnotations.contains(entity.getUri())) {
// if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
//this is a textAnnotation
initialMentions.add(entity.getUri());
}
//else TODO support also Entities!!
}
//create the TextAnnotations for the co-mention
for (Occurrence occurrence : comention.getOccurrences()) {
Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
//search for existing text annotation
boolean ignore = false;
//search for textAnnotations with the same end
IRI textAnnotation = null;
Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
while (it.hasNext()) {
Triple t = it.next();
Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
if (end != null && textAnnotations.contains(t.getSubject())) {
//metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI) t.getSubject();
if (end > occurrence.getEnd()) {
// there is an other TextAnnotation selecting a bigger Span
//so we should ignore this Occurrence
ignore = true;
}
}
}
it = metadata.filter(null, ENHANCER_END, endLiteral);
while (it.hasNext()) {
Triple t = it.next();
Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
if (start != null && textAnnotations.contains(t.getSubject())) {
//metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
textAnnotation = (IRI) t.getSubject();
if (start < occurrence.getStart()) {
// there is an other TextAnnotation selecting a bigger Span
//so we should ignore this Occurrence
ignore = true;
}
}
}
if (!ignore) {
//collect confidence values of co-mentions
//maximum confidence of suggestions of the initial mention
Double maxConfidence = null;
//maximum confidence of existing suggestions
Double maxExistingConfidence = null;
if (textAnnotation == null) {
//not found ... create a new TextAnnotation for the co-mention
textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
//add it to the set of TextAnnotations
textAnnotations.add(textAnnotation);
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
} else {
//if existing add this engine as contributor
metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
//maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation,
// ENHANCER_CONFIDENCE, Double.class, literalFactory);
}
//now process initial mention(s) for the co-mention
Set<IRI> dcTypes = new HashSet<IRI>();
for (IRI initialMention : initialMentions) {
//get the dc:type(s) of the initial mentions
Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
while (dcTypesIt.hasNext()) {
dcTypes.add(dcTypesIt.next());
}
//check confidence of the initial mention (fise:TextAnnotation)
Double confidnece = EnhancementEngineHelper.get(metadata, initialMention, ENHANCER_CONFIDENCE, Double.class, literalFactory);
if (confidnece != null) {
if (maxConfidence == null) {
maxConfidence = confidnece;
} else if (maxConfidence.compareTo(confidnece) <= 0) {
maxConfidence = confidnece;
}
}
//else nothing to do
//now we need to compare the suggestions of the initial
//mention(s) with the existing one.
//Get information about the suggestions of the initial mention
Map<RDFTerm, Double> initialSuggestions = new HashMap<RDFTerm, Double>();
Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm, RDFTerm>();
for (Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext(); ) {
if (!textAnnotations.contains(suggestions)) {
BlankNodeOrIRI suggestion = suggestions.next().getSubject();
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
if (suggestedEntity != null) {
//it has a suggestion
Double confidence = EnhancementEngineHelper.get(metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
if (maxConfidence == null) {
maxConfidence = confidence;
} else if (confidnece != null && maxConfidence.compareTo(confidnece) <= 0) {
maxConfidence = confidnece;
}
//else nothing to do
initialSuggestions.put(suggestion, confidence);
initialSuggestedEntities.put(suggestedEntity, suggestion);
}
//no suggestion (dc:relation to some other resource)
}
// else ignore dc:relation to other fise:TextAnnotations
}
//now we collect existing Suggestions for this TextAnnoation where we need
//to adjust the confidence (quite some things to check ....)
Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI, Double>();
if (maxConfidence != null && confidenceAdjustmentFactor < 1) {
//suggestions are defined by incoming dc:releation
for (Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation); esIt.hasNext(); ) {
BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
//but not all of them are suggestions
if (!textAnnotations.contains(existingSuggestion)) {
//ignore fise:TextAnnotations
Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
//ignore fise:TextAnnotations also suggested for the initial mention
if (!initialSuggestions.containsKey(existingSuggestion)) {
RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
//suggestions for the initial mention
if (!initialSuggestedEntities.containsKey(suggestedEntity)) {
//finally make sure that we adjust confidences only once
if (!adjustedSuggestions.contains(existingSuggestion)) {
existingSuggestions.put(existingSuggestion, existingConfidence);
}
//else confidence already adjusted
} else {
// different fise:EntityAnnotation, but same reference Entity
//we need to check confidences to decide what to do
RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
Double initialConfidence = initialSuggestions.get(initialSuggestion);
if (initialConfidence == null || (existingConfidence != null && existingConfidence.compareTo(initialConfidence) >= 0)) {
//existing confidence >= initial .. keep existing
initialSuggestions.remove(initialSuggestion);
if (maxExistingConfidence == null) {
maxExistingConfidence = existingConfidence;
} else if (maxExistingConfidence.compareTo(existingConfidence) <= 0) {
maxExistingConfidence = existingConfidence;
}
} else {
//adjust this one (if not yet adjusted)
if (!adjustedSuggestions.contains(existingSuggestion)) {
existingSuggestions.put(existingSuggestion, existingConfidence);
}
}
}
} else {
//a initial mention already present
//no need to process initial mention
initialSuggestions.remove(existingSuggestion);
if (maxExistingConfidence == null) {
maxExistingConfidence = existingConfidence;
} else if (existingConfidence != null && maxExistingConfidence.compareTo(existingConfidence) <= 0) {
maxExistingConfidence = existingConfidence;
}
//else maxExistingConfidence == null (undefined)
}
}
//else ignore dc:relations to other fise:TextAnnotations
}
for (Entry<BlankNodeOrIRI, Double> entry : existingSuggestions.entrySet()) {
if (entry.getValue() != null) {
double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
if (maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence) {
maxExistingConfidence = adjustedConfidence;
}
EnhancementEngineHelper.set(metadata, entry.getKey(), ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
//mark as adjusted
adjustedSuggestions.add(entry.getKey());
}
}
}
//add the suggestions of the initial mention to this one
for (RDFTerm suggestion : initialSuggestions.keySet()) {
metadata.add(new TripleImpl((BlankNodeOrIRI) suggestion, DC_RELATION, textAnnotation));
}
//finally link the co-mentation with the initial one
metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
//metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
}
// Adapt the dc:type values of the fise:TextAnnotation
// - if Suggestions added by this engine do have the max confidence
// use the dc:type values of the initial mention
// - if the original suggestions do have a higher confidence keep the
// existing
// - in case both do have the same confidence we add all dc:types
boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null || maxConfidence.compareTo(maxExistingConfidence) >= 0);
boolean addCoMentionDcTypes = maxExistingConfidence == null || (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
while (existingDcTypesIt.hasNext()) {
//removeExistingDcTypes == true
if ((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes) && removeExistingDcTypes) {
//remove the dcType
existingDcTypesIt.remove();
}
}
if (addCoMentionDcTypes) {
for (IRI dcType : dcTypes) {
//add missing
metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
}
}
//TODO: support also Entities
if (maxConfidence != null) {
//set the confidence value (if known)
EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
}
}
//else ignore this occurence
}
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class EnhancementRDFUtils method writeEntityAnnotation.
/**
* @param literalFactory
* the LiteralFactory to use
* @param graph
* the Graph to use
* @param contentItemId
* the contentItemId the enhancement is extracted from
* @param relatedEnhancements
* enhancements this textAnnotation is related to
* @param suggestion
* the entity suggestion
* @param nameField the field used to extract the name
* @param lang the preferred language to include or <code>null</code> if none
*/
public static IRI writeEntityAnnotation(EnhancementEngine engine, LiteralFactory literalFactory, Graph graph, IRI contentItemId, Collection<BlankNodeOrIRI> relatedEnhancements, Suggestion suggestion, String nameField, String lang) {
Representation rep = suggestion.getEntity().getRepresentation();
// 1. extract the "best label"
//Start with the matched one
Text label = suggestion.getMatchedLabel();
//if the matched label is not in the requested language
boolean langMatch = (lang == null && label.getLanguage() == null) || (label.getLanguage() != null && label.getLanguage().startsWith(lang));
//search if a better label is available for this Entity
if (!langMatch) {
Iterator<Text> labels = rep.getText(nameField);
while (labels.hasNext() && !langMatch) {
Text actLabel = labels.next();
langMatch = (lang == null && actLabel.getLanguage() == null) || (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang));
if (langMatch) {
//if the language matches ->
//override the matched label
label = actLabel;
}
}
}
//else the matched label will be the best to use
Literal literal;
if (label.getLanguage() == null) {
literal = new PlainLiteralImpl(label.getText());
} else {
literal = new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()));
}
// Now create the entityAnnotation
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(graph, engine, contentItemId);
// first relate this entity annotation to the text annotation(s)
for (BlankNodeOrIRI enhancement : relatedEnhancements) {
graph.add(new TripleImpl(entityAnnotation, DC_RELATION, enhancement));
}
IRI entityUri = new IRI(rep.getId());
// add the link to the referred entity
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityUri));
// add the label parsed above
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, literal));
if (suggestion.getScore() != null) {
graph.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
}
Iterator<Reference> types = rep.getReferences(RDF_TYPE.getUnicodeString());
while (types.hasNext()) {
graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(types.next().getReference())));
}
//add the name of the ReferencedSite that manages the Entity
if (suggestion.getEntity().getSite() != null) {
graph.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(suggestion.getEntity().getSite())));
}
return entityAnnotation;
}
Aggregations