use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class TrackingDereferencerBase method copyLdPath.
/**
* Executes the {@link #ldpathProgram} using the parsed URI as context and
* writes the the results to the parsed ImmutableGraph
* @param uri the context
* @param rdfBackend the RdfBackend the LDPath program is executed on
* @param ldpathProgram The {@link Program} parsed via the dereference context
* @param langs the set of languages to dereference
* @param graph the graph to store the results
* @param writeLock the write lock for the graph
* @throws DereferenceException on any {@link EntityhubException} while
* executing the LDPath program
*/
private void copyLdPath(IRI uri, RDFBackend<Object> rdfBackend, Program<Object> ldpathProgram, Set<String> langs, Graph graph, Lock writeLock) throws DereferenceException {
//A RdfReference needs to be used as context
RdfReference context = valueFactory.createReference(uri);
//create the representation that stores results in an intermediate
//graph (we do not want partial results on an error
Graph ldPathResults = new SimpleGraph();
RdfRepresentation result = valueFactory.createRdfRepresentation(uri, ldPathResults);
//execute the LDPath Program and write results to the RDF ImmutableGraph
try {
for (org.apache.marmotta.ldpath.model.fields.FieldMapping<?, Object> mapping : ldpathProgram.getFields()) {
Collection<?> values;
try {
values = mapping.getValues(rdfBackend, context);
} catch (RuntimeException e) {
throw new DereferenceException(uri, e);
}
if (values != null && !values.isEmpty()) {
String fieldName = mapping.getFieldName();
if (langs == null || langs.isEmpty()) {
result.add(fieldName, values);
} else {
//filter for languages
for (Object value : values) {
if ((!(value instanceof Text)) || langs.contains(((Text) value).getLanguage())) {
result.add(fieldName, value);
}
//else text with filtered language ... do not add
}
}
}
}
} catch (EntityhubException e) {
throw new DereferenceException(uri, e);
}
if (log.isTraceEnabled()) {
log.trace("dereferenced via LDPath {}", ModelUtils.getRepresentationInfo(result));
}
if (!ldPathResults.isEmpty()) {
//copy the results
writeLock.lock();
try {
graph.addAll(ldPathResults);
} finally {
writeLock.unlock();
}
}
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
//used for the palin literals in TextAnnotations
Language lang = new Language(language);
try {
List<NamedEntity> lista = this.client.extractEntities(text, language);
LiteralFactory literalFactory = LiteralFactory.getInstance();
Graph g = ci.getMetadata();
for (NamedEntity ne : lista) {
try {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
//add selected text as PlainLiteral in the language extracted from the text
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
if (ne.getFrom() != null && ne.getTo() != null) {
g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
}
} catch (NoConvertorException e) {
log.error(e.getMessage(), e);
}
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
}
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class DBPSpotlightSpotEnhancementEngine method createEnhancements.
/**
* The method adds the returned DBpedia Spotlight surface forms to the
* content item's metadata. For each one an TextAnnotation is created.
*
* @param occs
* a Collection of entity information
* @param ci
* the content item
*/
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String content, Language lang) {
HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
Graph model = ci.getMetadata();
for (SurfaceForm occ : occs) {
IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, content, lang);
if (entityAnnotationMap.containsKey(occ.name)) {
model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
} else {
entityAnnotationMap.put(occ.name, textAnnotation);
}
}
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class SpotlightEngineUtils method createEntityAnnotation.
/**
* Creates a fise:EntityAnnotation for the parsed parameters and
* adds it the the {@link ContentItem#getMetadata()}. <p>
* This method assumes a write lock on the parsed content item.
* @param resource the candidate resource
* @param engine the engine
* @param ci the content item
* @param textAnnotation the fise:TextAnnotation to dc:relate the
* created fise:EntityAnnotation
* @return the URI of the created fise:TextAnnotation
*/
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
Graph model = ci.getMetadata();
Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
return entityAnnotation;
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class DereferenceEngineTest method validateDereferencedEntities.
private void validateDereferencedEntities(Graph metadata, IRI... entityReferenceFields) {
Graph expected = new IndexedGraph();
for (IRI entityReferenceField : entityReferenceFields) {
Iterator<Triple> referenced = metadata.filter(null, entityReferenceField, null);
while (referenced.hasNext()) {
IRI entity = (IRI) referenced.next().getObject();
Iterator<Triple> entityTriples = testData.filter(entity, null, null);
while (entityTriples.hasNext()) {
expected.add(entityTriples.next());
}
}
}
Graph notExpected = new IndexedGraph(testData);
notExpected.removeAll(expected);
Assert.assertTrue(metadata.containsAll(expected));
Assert.assertTrue(Collections.disjoint(metadata, notExpected));
}
Aggregations