use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class CeliLemmatizerEnhancementEngineTest method validateLemmaFormProperty.
/**
* [1..*] values of an {@link PlainLiteral} in the same language as the
* analyzed text
* @param enhancements The graph with the enhancements
* @param textAnnotation the TextAnnotation to check
* @param lang the language of the analyzed text
* @return The number of lemma forms found
*/
private int validateLemmaFormProperty(Graph enhancements, BlankNodeOrIRI textAnnotation, String lang) {
Iterator<Triple> lemmaFormsIterator = enhancements.filter(textAnnotation, hasLemmaForm, null);
assertTrue("No lemma form value found for TextAnnotation " + textAnnotation + "!", lemmaFormsIterator.hasNext());
int lemmaFormCount = 0;
while (lemmaFormsIterator.hasNext()) {
lemmaFormCount++;
RDFTerm lemmaForms = lemmaFormsIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type Literal", lemmaForms instanceof Literal);
assertFalse("Lemma forms MUST NOT be empty", ((Literal) lemmaForms).getLexicalForm().isEmpty());
assertNotNull("Language of the Lemma Form literal MUST BE not null", ((Literal) lemmaForms).getLanguage());
assertEquals("Language of the Lemma Form literal MUST BE the same as for the parsed text", lang, ((Literal) lemmaForms).getLanguage().toString());
}
return lemmaFormCount;
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class SpotlightEngineUtils method createEntityAnnotation.
/**
* Creates a fise:EntityAnnotation for the parsed parameters and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param resource the candidate resource
* @param engine the engine
* @param ci the content item
* @param textAnnotation the fise:TextAnnotation to dc:relate the
* created fise:EntityAnnotation
* @return the URI of the created fise:TextAnnotation
*/
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
Graph model = ci.getMetadata();
Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
return entityAnnotation;
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.
/**
* The method adds the returned DBpedia Spotlight annotations to the content
* item's metadata. For each DBpedia resource an EntityAnnotation is created
* and linked to the according TextAnnotation.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
for (Annotation occ : occs) {
if (textAnnotationsMap.get(occ.surfaceForm) != null) {
IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
Graph model = ci.getMetadata();
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
entityAnnotationMap.put(occ.uri, entityAnnotation);
Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
Collection<String> t = occ.getTypeNames();
if (t != null) {
Iterator<String> it = t.iterator();
while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
}
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
}
}
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class OpenCalaisEngine method queryModel.
/**
* Extracts the relevant entity information from the Calais RDF data.
* The entities and the relted information is extracted by a Sparql query.
*
* @param model the Graph representing the Calais data
*
* @return a Collection of entity information
* @throws EngineException on a {@link ParseException} while processing the
* Sparql query.
*/
public Collection<CalaisEntityOccurrence> queryModel(Graph model) throws EngineException {
//TODO extract also Geo info (latitude/longitude)?
String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " + "PREFIX p: <http://s.opencalais.com/1/pred/> " + "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " + "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset ?length ?exact ?context ?score WHERE { " + "?id p:name ?name ." + "?id rdf:type ?type ." + "?y p:subject ?id ." + "?y p:offset ?offset ." + "?y p:length ?length ." + "?y p:exact ?exact ." + "?y p:detection ?context ." + " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score . } " + // get disambiguated entity references if available
" OPTIONAL { ?did p:subject ?id . ?did p:name ?name . ?did rdf:type ?dtype . } " + "FILTER (" + "?type = t:Person || " + "?type = t:City || " + "?type = t:Continent || " + "?type = t:Country || " + "?type = t:ProvinceOrState || " + "?type = t:Region || " + "?type = t:Company || " + "?type = t:Facility || " + "?type = t:Organization " + ")" + "} ";
Collection<CalaisEntityOccurrence> result = new ArrayList<CalaisEntityOccurrence>();
try {
SelectQuery sQuery = (SelectQuery) QueryParser.getInstance().parse(query);
ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
while (rs.hasNext()) {
SolutionMapping row = rs.next();
CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
RDFTerm disambiguated = row.get("did");
occ.id = (disambiguated == null ? row.get("id") : disambiguated);
if (onlyNERMode) {
occ.type = row.get("type");
} else {
occ.type = (disambiguated == null ? row.get("type") : row.get("dtype"));
}
if (calaisTypeMap != null) {
IRI mappedType = calaisTypeMap.get(occ.type);
if (mappedType != null) {
occ.type = mappedType;
}
}
occ.name = ((Literal) row.get("name")).getLexicalForm();
occ.exact = ((Literal) row.get("exact")).getLexicalForm();
//TODO for html the offsets might not be those of the original document but refer to a cleaned up version?
occ.offset = Integer.valueOf(((Literal) row.get("offset")).getLexicalForm());
// remove brackets
occ.context = ((Literal) row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
occ.length = Integer.valueOf(((Literal) row.get("length")).getLexicalForm());
if (row.get("score") != null) {
occ.relevance = Double.valueOf(((Literal) row.get("score")).getLexicalForm());
}
result.add(occ);
}
} catch (ParseException e) {
throw new EngineException("Unable to parse SPARQL query for processing OpenCalais results", e);
}
log.info("Found {} occurences", result.size());
return result;
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class AbstractOntologyCollectorImpl method exportToGraph.
/**
* This method has no conversion calls, to it can be invoked by subclasses that wish to modify it
* afterwards.
*
* @param merge
* @return
*/
protected Graph exportToGraph(boolean merge, org.semanticweb.owlapi.model.IRI prefix) {
// if (merge) throw new UnsupportedOperationException(
// "Merge not implemented yet for Clerezza triple collections.");
long before = System.currentTimeMillis();
// No need to store, give it a name, or anything.
Graph root = new SimpleGraph();
IRI iri = new IRI(prefix + _id);
// Add the import declarations for directly managed ontologies.
if (root != null) {
// Set the ontology ID
root.add(new TripleImpl(iri, RDF.type, OWL.Ontology));
if (merge) {
log.warn("Merging of Clerezza triple collections is only implemented one level down. Import statements will be preserved for further levels.");
Iterator<Triple> it;
Set<RDFTerm> importTargets = new HashSet<RDFTerm>();
for (OWLOntologyID ontologyId : managedOntologies) {
ImmutableGraph g = getOntology(ontologyId, ImmutableGraph.class, false);
root.addAll(g);
it = g.filter(null, OWL.imports, null);
while (it.hasNext()) {
org.semanticweb.owlapi.model.IRI tgt;
RDFTerm r = it.next().getObject();
try {
if (r instanceof IRI)
tgt = org.semanticweb.owlapi.model.IRI.create(((IRI) r).getUnicodeString());
else if (r instanceof Literal)
tgt = org.semanticweb.owlapi.model.IRI.create(((Literal) r).getLexicalForm());
else
tgt = org.semanticweb.owlapi.model.IRI.create(r.toString());
tgt = URIUtils.sanitize(tgt);
importTargets.add(new IRI(tgt.toString()));
} catch (Exception ex) {
log.error("FAILED to obtain import target from resource {}", r);
continue;
}
}
it = g.filter(null, RDF.type, OWL.Ontology);
while (it.hasNext()) {
BlankNodeOrIRI ontology = it.next().getSubject();
log.debug("Removing all triples related to {} from {}", ontology, iri);
Iterator<Triple> it2 = g.filter(ontology, null, null);
while (it2.hasNext()) root.remove(it2.next());
}
/*
* Reinstate import statements, though. If imported ontologies were not merged earlier, we
* are not doing it now anyway.
*/
for (RDFTerm target : importTargets) root.add(new TripleImpl(iri, OWL.imports, target));
}
} else {
String base = prefix + getID();
for (int i = 0; i < backwardPathLength; i++) base = URIUtils.upOne(URI.create(base)).toString();
base += "/";
// The key set of managedOntologies contains the ontology IRIs, not their storage keys.
for (OWLOntologyID ontologyId : managedOntologies) {
org.semanticweb.owlapi.model.IRI physIRI = // .create(base + ontologyId.getVersionIRI()));
org.semanticweb.owlapi.model.IRI.create(base + OntologyUtils.encode(ontologyId));
root.add(new TripleImpl(iri, OWL.imports, new IRI(physIRI.toString())));
}
}
log.debug("Clerezza export of {} completed in {} ms.", getID(), System.currentTimeMillis() - before);
}
return root;
}
Aggregations