use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class LocationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
IRI contentItemId = ci.getUri();
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
//get all the textAnnotations
/*
* this Map holds the name as key and all the text annotations of
* dc:type dbpedia:Place that select this name as value
* this map is used to avoid multiple lookups for text annotations
* selecting the same name.
*/
Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
while (iterator.hasNext()) {
//the enhancement annotating an place
BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
//this can still be an TextAnnotation of an EntityAnnotation
//so we need to filter TextAnnotation
Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
if (graph.contains(isTextAnnotation)) {
//now get the name
String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
if (name == null) {
log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
} else {
Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
if (placeEnhancements == null) {
placeEnhancements = new ArrayList<BlankNodeOrIRI>();
name2placeEnhancementMap.put(name, placeEnhancements);
}
placeEnhancements.add(placeEnhancement);
}
} else {
//TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
// than we need to parse the name based on the enhancer:entity-name property
}
}
//Now we do have all the names we need to lookup
Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
if (getMaxLocationEnhancements() != null) {
requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
}
for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
List<Toponym> results;
try {
requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
results = geonamesService.searchToponyms(requestParams);
} catch (Exception e) {
/*
* TODO: Review if it makes sense to catch here for each name, or
* to catch the whole loop.
* This depends if single requests can result in Exceptions
* (e.g. because of encoding problems) or if usually Exceptions
* are thrown because of general things like connection issues
* or service unavailability.
*/
throw new EngineException(this, ci, e);
}
if (results != null) {
Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}", result.getGeoNameId(), result.getName());
Double score = getToponymScore(result, maxScore);
log.debug(" > score {}", score);
if (score != null) {
if (score < minScore) {
//if score is lower than the under bound, than stop
break;
}
} else {
log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
/*
* NOTE: If score is not present all suggestions are
* added as enhancements to the metadata of the content
* item.
*/
}
//write the enhancement!
BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}", score, minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
//get the hierarchy
try {
Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
for (int level = 0; hierarchy.hasNext(); level++) {
Toponym hierarchyEntry = hierarchy.next();
// maybe add an configuration
if (level == 0) {
//Mother earth -> ignore
continue;
}
//write it as dependent to the locationEnhancement
if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
//TODO: add additional checks based on possible
// configuration here!
log.debug(" - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
/*
* The hierarchy service dose not provide a score, because it would be 1.0
* so we need to set the score to this value.
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
}
}
}
}
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ClerezzaOntologyProvider method toOWLOntology.
/**
*
* @param graphName
* @param forceMerge
* if set to false, the selected import management policy will be applied.
* @return
* @throws OWLOntologyCreationException
*/
protected OWLOntology toOWLOntology(IRI graphName, boolean forceMerge) throws OWLOntologyCreationException {
log.debug("Exporting graph to OWLOntology");
log.debug(" -- ImmutableGraph name : {}", graphName);
OWLOntologyManager mgr = OWLManager.createOWLOntologyManager();
// Never try to import
mgr.addIRIMapper(new PhonyIRIMapper(Collections.<org.semanticweb.owlapi.model.IRI>emptySet()));
Set<OWLOntologyID> loaded = new HashSet<OWLOntologyID>();
Graph graph = store.getGraph(graphName);
IRI ontologyId = null;
// Get the id of this ontology.
Iterator<Triple> itt = graph.filter(null, RDF.type, OWL.Ontology);
if (itt.hasNext()) {
BlankNodeOrIRI nl = itt.next().getSubject();
if (nl instanceof IRI)
ontologyId = (IRI) nl;
}
List<OWLOntologyID> revImps = new Stack<OWLOntologyID>();
List<OWLOntologyID> lvl1 = new Stack<OWLOntologyID>();
fillImportsReverse(keymap.getReverseMapping(graphName), revImps, lvl1);
// If not set to merge (either by policy of by force), adopt the set import policy.
if (!forceMerge && !ImportManagementPolicy.MERGE.equals(getImportManagementPolicy())) {
OWLOntology o = OWLAPIToClerezzaConverter.clerezzaGraphToOWLOntology(graph, mgr);
// TODO make it not flat.
// Examining the reverse imports stack will flatten all imports.
List<OWLOntologyChange> changes = new ArrayList<OWLOntologyChange>();
OWLDataFactory df = OWLManager.getOWLDataFactory();
List<OWLOntologyID> listToUse;
switch(getImportManagementPolicy()) {
case FLATTEN:
listToUse = revImps;
break;
case PRESERVE:
listToUse = lvl1;
break;
default:
listToUse = lvl1;
break;
}
for (OWLOntologyID ref : listToUse) if (!loaded.contains(ref) && !ref.equals(keymap.getReverseMapping(graphName))) {
changes.add(new AddImport(o, df.getOWLImportsDeclaration(ref.getOntologyIRI())));
loaded.add(ref);
}
o.getOWLOntologyManager().applyChanges(changes);
return o;
} else {
// If there is just the root ontology, convert it straight away.
if (revImps.size() == 1 && revImps.contains(graphName)) {
OWLOntology o = OWLAPIToClerezzaConverter.clerezzaGraphToOWLOntology(graph, mgr);
return o;
}
// FIXME when there's more than one ontology, this way of merging them seems inefficient...
Graph tempGraph = new IndexedGraph();
// The set of triples that will be excluded from the merge
Set<Triple> exclusions = new HashSet<Triple>();
// Examine all reverse imports
for (OWLOntologyID ref : revImps) if (!loaded.contains(ref)) {
// Get the triples
Graph imported = // store.getTriples(ref);
getStoredOntology(getKey(ref), Graph.class, false);
// For each owl:Ontology
Iterator<Triple> remove = imported.filter(null, RDF.type, OWL.Ontology);
while (remove.hasNext()) {
BlankNodeOrIRI subj = remove.next().getSubject();
/*
* If it's not the root ontology, trash all its triples. If the root ontology is
* anonymous, all ontology annotations are to be trashed without distinction.
*/
if (ontologyId == null || !subj.equals(ontologyId)) {
Iterator<Triple> it = imported.filter(subj, null, null);
while (it.hasNext()) {
Triple t = it.next();
exclusions.add(t);
}
}
}
Iterator<Triple> it = imported.iterator();
while (it.hasNext()) {
Triple t = it.next();
if (!exclusions.contains(t))
tempGraph.add(t);
}
loaded.add(ref);
}
// online.
return OWLAPIToClerezzaConverter.clerezzaGraphToOWLOntology(tempGraph, mgr);
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ClerezzaOntologyProvider method getOntologyNetworkConfiguration.
public OntologyNetworkConfiguration getOntologyNetworkConfiguration() {
Map<String, Collection<OWLOntologyID>> coreOntologies = new HashMap<String, Collection<OWLOntologyID>>(), customOntologies = new HashMap<String, Collection<OWLOntologyID>>();
Map<String, Collection<String>> attachedScopes = new HashMap<String, Collection<String>>();
final Graph meta = store.getGraph(new IRI(metaGraphId));
// Scopes first
for (Iterator<Triple> it = meta.filter(null, RDF.type, SCOPE_URIREF); it.hasNext(); ) {
// for each
// scope
Triple ta = it.next();
BlankNodeOrIRI sub = ta.getSubject();
if (sub instanceof IRI) {
String s = ((IRI) sub).getUnicodeString(), prefix = _NS_STANBOL_INTERNAL + Scope.shortName + "/";
if (s.startsWith(prefix)) {
String scopeId = s.substring(prefix.length());
log.info("Rebuilding scope \"{}\".", scopeId);
coreOntologies.put(scopeId, new TreeSet<OWLOntologyID>());
customOntologies.put(scopeId, new TreeSet<OWLOntologyID>());
IRI core_ur = null, custom_ur = null;
RDFTerm r;
// Check core space
Iterator<Triple> it2 = meta.filter(sub, HAS_SPACE_CORE_URIREF, null);
if (it2.hasNext()) {
r = it2.next().getObject();
if (r instanceof IRI)
core_ur = (IRI) r;
} else {
it2 = meta.filter(null, IS_SPACE_CORE_OF_URIREF, sub);
if (it2.hasNext()) {
r = it2.next().getSubject();
if (r instanceof IRI)
core_ur = (IRI) r;
}
}
// Check custom space
it2 = meta.filter(sub, HAS_SPACE_CUSTOM_URIREF, null);
if (it2.hasNext()) {
r = it2.next().getObject();
if (r instanceof IRI)
custom_ur = (IRI) r;
} else {
it2 = meta.filter(null, IS_SPACE_CUSTOM_OF_URIREF, sub);
if (it2.hasNext()) {
r = it2.next().getSubject();
if (r instanceof IRI)
custom_ur = (IRI) r;
}
}
// retrieve the ontologies
if (core_ur != null) {
for (it2 = meta.filter(core_ur, null, null); it2.hasNext(); ) {
Triple t = it2.next();
IRI predicate = t.getPredicate();
if (predicate.equals(MANAGES_URIREF)) {
if (t.getObject() instanceof IRI)
coreOntologies.get(scopeId).add(// FIXME must be very
keymap.buildPublicKey((IRI) t.getObject()));
}
}
for (it2 = meta.filter(null, null, core_ur); it2.hasNext(); ) {
Triple t = it2.next();
IRI predicate = t.getPredicate();
if (predicate.equals(IS_MANAGED_BY_URIREF)) {
if (t.getSubject() instanceof IRI)
coreOntologies.get(scopeId).add(// FIXME must be very
keymap.buildPublicKey((IRI) t.getSubject()));
}
}
}
if (custom_ur != null) {
for (it2 = meta.filter(custom_ur, null, null); it2.hasNext(); ) {
Triple t = it2.next();
IRI predicate = t.getPredicate();
if (predicate.equals(MANAGES_URIREF)) {
if (t.getObject() instanceof IRI)
customOntologies.get(scopeId).add(// FIXME must be very
keymap.buildPublicKey((IRI) t.getObject()));
}
}
for (it2 = meta.filter(null, null, custom_ur); it2.hasNext(); ) {
Triple t = it2.next();
IRI predicate = t.getPredicate();
if (predicate.equals(IS_MANAGED_BY_URIREF)) {
if (t.getSubject() instanceof IRI)
customOntologies.get(scopeId).add(// FIXME must be very
keymap.buildPublicKey((IRI) t.getSubject()));
}
}
}
}
}
}
// Sessions next
Map<String, Collection<OWLOntologyID>> sessionOntologies = new HashMap<String, Collection<OWLOntologyID>>();
for (Iterator<Triple> it = meta.filter(null, RDF.type, SESSION_URIREF); it.hasNext(); ) {
// for each
// scope
Triple ta = it.next();
BlankNodeOrIRI sub = ta.getSubject();
if (sub instanceof IRI) {
IRI ses_ur = (IRI) sub;
String s = ((IRI) sub).getUnicodeString();
String prefix = _NS_STANBOL_INTERNAL + Session.shortName + "/";
if (s.startsWith(prefix)) {
String sessionId = s.substring(prefix.length());
log.info("Rebuilding session \"{}\".", sessionId);
sessionOntologies.put(sessionId, new TreeSet<OWLOntologyID>());
attachedScopes.put(sessionId, new TreeSet<String>());
// retrieve the ontologies
if (ses_ur != null) {
for (Iterator<Triple> it2 = meta.filter(ses_ur, MANAGES_URIREF, null); it2.hasNext(); ) {
RDFTerm obj = it2.next().getObject();
if (obj instanceof IRI)
sessionOntologies.get(sessionId).add(// FIXME must be very temporary!
keymap.buildPublicKey((IRI) obj));
}
for (Iterator<Triple> it2 = meta.filter(null, IS_MANAGED_BY_URIREF, ses_ur); it2.hasNext(); ) {
RDFTerm subj = it2.next().getSubject();
if (subj instanceof IRI)
sessionOntologies.get(sessionId).add(// FIXME must be very temporary!
keymap.buildPublicKey((IRI) subj));
}
for (Iterator<Triple> it2 = meta.filter(null, APPENDED_TO_URIREF, ses_ur); it2.hasNext(); ) {
RDFTerm subj = it2.next().getSubject();
if (subj instanceof IRI) {
String s1 = ((IRI) subj).getUnicodeString();
String prefix1 = _NS_STANBOL_INTERNAL + Scope.shortName + "/";
if (s1.startsWith(prefix1)) {
String scopeId = s1.substring(prefix1.length());
attachedScopes.get(sessionId).add(scopeId);
}
}
}
for (Iterator<Triple> it2 = meta.filter(ses_ur, HAS_APPENDED_URIREF, null); it2.hasNext(); ) {
RDFTerm obj = it2.next().getObject();
if (obj instanceof IRI) {
String s1 = ((IRI) obj).getUnicodeString();
String prefix1 = _NS_STANBOL_INTERNAL + Scope.shortName + "/";
if (s1.startsWith(prefix1)) {
String scopeId = s1.substring(prefix1.length());
attachedScopes.get(sessionId).add(scopeId);
}
}
}
}
}
}
}
return new OntologyNetworkConfiguration(coreOntologies, customOntologies, sessionOntologies, attachedScopes);
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ResourceMapping method apply.
@Override
public boolean apply(Graph graph, BlankNodeOrIRI subject, Metadata metadata) {
boolean added = false;
BlankNodeOrIRI s = new BlankNode();
mappingLogger.log(subject, ontProperty, null, s);
if (!required.isEmpty()) {
Graph g = new SimpleGraph();
for (Mapping m : required) {
if (!m.apply(g, s, metadata)) {
return false;
}
}
graph.addAll(g);
added = true;
}
for (Mapping m : optional) {
if (m.apply(graph, s, metadata)) {
added = true;
}
}
if (added) {
for (Mapping m : additional) {
m.apply(graph, s, metadata);
}
graph.add(new TripleImpl(subject, ontProperty, s));
}
return added;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class TikaEngineTest method testGEOMetadata.
@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
log.info(">>> testGEOMetadata <<<");
//first validate Media RDFTerm Ontology
IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
//IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
//"video/x-ms-asf");
ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
assertTrue(it.hasNext());
RDFTerm r = it.next().getObject();
assertFalse(it.hasNext());
assertTrue(r instanceof BlankNodeOrIRI);
BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
//lat
verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
//long
verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
//second the GEO ont
IRI lat = new IRI(NamespaceEnum.geo + "lat");
IRI lon = new IRI(NamespaceEnum.geo + "long");
//lat
verifyValue(ci, lat, XSD.double_, "12.54321");
//long
verifyValue(ci, lon, XSD.double_, "-54.1234");
}
Aggregations