use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class ContentItemResource method initOccurrences.
private void initOccurrences() {
Graph graph = contentItem.getMetadata();
LiteralFactory lf = LiteralFactory.getInstance();
Map<IRI, Collection<BlankNodeOrIRI>> suggestionMap = new HashMap<IRI, Collection<BlankNodeOrIRI>>();
// 1) get Entity Annotations
Map<BlankNodeOrIRI, Map<EAProps, Object>> entitySuggestionMap = new HashMap<BlankNodeOrIRI, Map<EAProps, Object>>();
Iterator<Triple> entityAnnotations = graph.filter(null, RDF.type, ENHANCER_ENTITYANNOTATION);
while (entityAnnotations.hasNext()) {
BlankNodeOrIRI entityAnnotation = entityAnnotations.next().getSubject();
// to avoid multiple lookups (e.g. if one entityAnnotation links to+
// several TextAnnotations) we cache the data in an intermediate Map
Map<EAProps, Object> eaData = new EnumMap<EAProps, Object>(EAProps.class);
eaData.put(EAProps.entity, getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE));
eaData.put(EAProps.label, getString(graph, entityAnnotation, ENHANCER_ENTITY_LABEL));
eaData.put(EAProps.confidence, EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf));
entitySuggestionMap.put(entityAnnotation, eaData);
Iterator<IRI> textAnnotations = getReferences(graph, entityAnnotation, DC_RELATION);
while (textAnnotations.hasNext()) {
IRI textAnnotation = textAnnotations.next();
Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
if (suggestions == null) {
suggestions = new ArrayList<BlankNodeOrIRI>();
suggestionMap.put(textAnnotation, suggestions);
}
suggestions.add(entityAnnotation);
}
}
// 2) get the TextAnnotations
Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
while (textAnnotations.hasNext()) {
BlankNodeOrIRI textAnnotation = textAnnotations.next().getSubject();
// we need to process those to show multiple mentions
// if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
// // this is not the most specific occurrence of this name: skip
// continue;
// }
String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
// TextAnnotations without fise:selected-text are no longer ignored
// if(text == null){
// //ignore text annotations without text
// continue;
// }
Integer start = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class, lf);
Integer end = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class, lf);
Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
Iterator<IRI> types = getReferences(graph, textAnnotation, DC_TYPE);
if (!types.hasNext()) {
// create an iterator over null in case no types are present
types = Collections.singleton((IRI) null).iterator();
}
while (types.hasNext()) {
IRI type = types.next();
Map<EntityExtractionSummary, EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
if (occurrenceMap == null) {
occurrenceMap = new TreeMap<EntityExtractionSummary, EntityExtractionSummary>();
extractionsByTypeMap.put(type, occurrenceMap);
}
// in case of a language annotation use the detected language as label
if (DC_LINGUISTIC_SYSTEM.equals(type)) {
text = EnhancementEngineHelper.getString(graph, textAnnotation, DC_LANGUAGE);
}
EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start, end, confidence, defaultThumbnails);
Collection<BlankNodeOrIRI> suggestions = suggestionMap.get(textAnnotation);
if (suggestions != null) {
for (BlankNodeOrIRI entityAnnotation : suggestions) {
Map<EAProps, Object> eaData = entitySuggestionMap.get(entityAnnotation);
entity.addSuggestion((IRI) eaData.get(EAProps.entity), (String) eaData.get(EAProps.label), (Double) eaData.get(EAProps.confidence), graph);
}
}
EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
if (existingSummary == null) {
// new extraction summary
occurrenceMap.put(entity, entity);
} else {
// extraction summary with this text and suggestions already
// present ... only add a mention to the existing
existingSummary.addMention(new Mention(text, start, end, confidence));
}
}
}
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class EnhancementEngineHelper method createEnhancement.
/**
* Create a new enhancement instance in the metadata-graph of the content
* item along with default properties (dc:creator and dc:created) and return
* the IRI of the extraction so that engines can further add. <p>
* <i>NOTE:</i> This method was protected prior to <code>0.12.1</code> (see
* <a href="https://issues.apache.org/jira/browse/STANBOL-1321">STANBOL-1321</a>)
*
* @param ci the ContentItem being under analysis
* @param engine the Engine performing the analysis
*
* @return the URI of the new enhancement instance
* @since 0.12.1
*/
public static IRI createEnhancement(Graph metadata, EnhancementEngine engine, IRI contentItemId) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
IRI enhancement = new IRI("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
// add the Enhancement Type
metadata.add(new TripleImpl(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT));
// add the extracted from content item
metadata.add(new TripleImpl(enhancement, ENHANCER_EXTRACTED_FROM, contentItemId));
// creation date
metadata.add(new TripleImpl(enhancement, DC_CREATED, literalFactory.createTypedLiteral(new Date())));
// the engines that extracted the data
// TODO: add some kind of versioning info for the extractor?
// TODO: use a public dereferencing URI instead? that would allow for
// explicit versioning too
/* NOTE (Rupert Westenthaler 2010-05-26):
* The Idea is to use the ComponentContext in the activate() method of
* an Enhancer to get the bundle name/version and use that as an
* URI for the creator.
* We would need to add getEnhancerID() method to the enhancer interface
* to access this information
*/
metadata.add(new TripleImpl(enhancement, DC_CREATOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
return enhancement;
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class EnhancementEngineHelper method createNewExtraction.
/**
* Create a new extraction instance in the metadata-graph of the content
* item along with default properties (dc:creator and dc:created) and return
* the IRI of the extraction so that engines can further add
*
* @param ci the ContentItem being under analysis
* @param engine the Engine performing the analysis
* @return the URI of the new extraction instance
* @deprecated will be remove with 1.0
* @see EnhancementEngineHelper#createEntityEnhancement(ContentItem, EnhancementEngine)
* @see EnhancementEngineHelper#createTextEnhancement(ContentItem, EnhancementEngine)
*/
@Deprecated
public static IRI createNewExtraction(ContentItem ci, EnhancementEngine engine) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph metadata = ci.getMetadata();
IRI extraction = new IRI("urn:extraction-" + EnhancementEngineHelper.randomUUID());
metadata.add(new TripleImpl(extraction, RDF_TYPE, ENHANCER_EXTRACTION));
// relate the extraction to the content item
metadata.add(new TripleImpl(extraction, ENHANCER_RELATED_CONTENT_ITEM, new IRI(ci.getUri().getUnicodeString())));
// creation date
metadata.add(new TripleImpl(extraction, DC_CREATED, literalFactory.createTypedLiteral(new Date())));
// the engines that extracted the data
// TODO: add some kind of versioning info for the extractor?
// TODO: use a public dereferencing URI instead? that would allow for
// explicit versioning too
metadata.add(new TripleImpl(extraction, DC_CREATOR, literalFactory.createTypedLiteral(engine.getClass().getName())));
return extraction;
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class GraphMultiplexer method buildResource.
/**
* Creates an {@link IRI} out of an {@link OWLOntologyID}, so it can be used as an identifier. This
* does NOT necessarily correspond to the IRI that identifies the stored graph. In order to obtain
* that, check the objects of any MAPS_TO_GRAPH assertions.
*
* @param publicKey
* @return
*/
protected IRI buildResource(final OWLOntologyID publicKey) {
if (publicKey == null)
throw new IllegalArgumentException("Cannot build a IRI resource on a null public key!");
// The IRI is of the form ontologyIRI[:::versionIRI] (TODO use something less conventional?)
// XXX should versionIRI also include the version IRI set by owners? Currently not
// Remember not to sanitize logical identifiers.
org.semanticweb.owlapi.model.IRI ontologyIri = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
if (ontologyIri == null)
throw new IllegalArgumentException("Cannot build a IRI resource on an anonymous public key!");
log.debug("Searching for a meta graph entry for public key:");
log.debug(" -- {}", publicKey);
IRI match = null;
LiteralFactory lf = LiteralFactory.getInstance();
Literal oiri = lf.createTypedLiteral(new IRI(ontologyIri.toString()));
Literal viri = versionIri == null ? null : lf.createTypedLiteral(new IRI(versionIri.toString()));
for (Iterator<Triple> it = meta.filter(null, HAS_ONTOLOGY_IRI_URIREF, oiri); it.hasNext(); ) {
RDFTerm subj = it.next().getSubject();
log.debug(" -- Ontology IRI match found. Scanning");
log.debug(" -- RDFTerm : {}", subj);
if (!(subj instanceof IRI)) {
log.debug(" ---- (uncomparable: skipping...)");
continue;
}
if (viri != null) {
// Must find matching versionIRI
if (meta.contains(new TripleImpl((IRI) subj, HAS_VERSION_IRI_URIREF, viri))) {
log.debug(" ---- Version IRI match!");
match = (IRI) subj;
// Found
break;
} else {
log.debug(" ---- Expected version IRI match not found.");
// There could be another with the right versionIRI.
continue;
}
} else {
// Must find unversioned resource
if (meta.filter((IRI) subj, HAS_VERSION_IRI_URIREF, null).hasNext()) {
log.debug(" ---- Unexpected version IRI found. Skipping.");
continue;
} else {
log.debug(" ---- Unversioned match!");
match = (IRI) subj;
// Found
break;
}
}
}
log.debug("Matching IRI in graph : {}", match);
if (match == null)
return new IRI(OntologyUtils.encode(publicKey));
else
return match;
}
use of org.apache.clerezza.rdf.core.LiteralFactory in project stanbol by apache.
the class MetaGraphManager method buildResource.
protected IRI buildResource(final OWLOntologyID publicKey) {
if (publicKey == null)
throw new IllegalArgumentException("Cannot build a IRI resource on a null public key!");
// The IRI is of the form ontologyIRI[:::versionIRI] (TODO use something less conventional?)
// XXX should versionIRI also include the version IRI set by owners? Currently not
// Remember not to sanitize logical identifiers.
org.semanticweb.owlapi.model.IRI ontologyIri = publicKey.getOntologyIRI(), versionIri = publicKey.getVersionIRI();
if (ontologyIri == null)
throw new IllegalArgumentException("Cannot build a IRI resource on an anonymous public key!");
log.debug("Searching for a meta graph entry for public key:");
log.debug(" -- {}", publicKey);
IRI match = null;
LiteralFactory lf = LiteralFactory.getInstance();
Literal oiri = lf.createTypedLiteral(new IRI(ontologyIri.toString()));
Literal viri = versionIri == null ? null : lf.createTypedLiteral(new IRI(versionIri.toString()));
for (Iterator<Triple> it = graph.filter(null, HAS_ONTOLOGY_IRI_URIREF, oiri); it.hasNext(); ) {
RDFTerm subj = it.next().getSubject();
log.debug(" -- Ontology IRI match found. Scanning");
log.debug(" -- RDFTerm : {}", subj);
if (!(subj instanceof IRI)) {
log.debug(" ---- (uncomparable: skipping...)");
continue;
}
if (viri != null) {
// Must find matching versionIRI
if (graph.contains(new TripleImpl((IRI) subj, HAS_VERSION_IRI_URIREF, viri))) {
log.debug(" ---- Version IRI match!");
match = (IRI) subj;
// Found
break;
} else {
log.debug(" ---- Expected version IRI match not found.");
// There could be another with the right versionIRI.
continue;
}
} else {
// Must find unversioned resource
if (graph.filter((IRI) subj, HAS_VERSION_IRI_URIREF, null).hasNext()) {
log.debug(" ---- Unexpected version IRI found. Skipping.");
continue;
} else {
log.debug(" ---- Unversioned match!");
match = (IRI) subj;
// Found
break;
}
}
}
log.debug("Matching IRI in graph : {}", match);
if (match == null)
return new IRI(OntologyUtils.encode(publicKey));
else
return match;
}
Aggregations