use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class EntityLinker method lookupEntities.
/**
* Searches for Entities in the {@link #entitySearcher} corresponding to the
* {@link Token#getText() words} of the current {@link #state position} in
* the text.
* @param searchStrings the list of {@link Token#getText() words} to search
* entities for.
* @return The sorted list with the suggestions.
* If there are no suggestions an empty list will be returned.
*/
private List<Suggestion> lookupEntities(List<String> searchStrings) throws EngineException {
Collection<? extends Representation> results;
try {
results = entitySearcher.lookup(config.getNameField(), config.getSelectedFields(), searchStrings, state.getSentence().getLanguage(), config.getDefaultLanguage());
} catch (RuntimeException e) {
throw new EngineException(e.getMessage(), e);
}
List<Suggestion> suggestions = new ArrayList<Suggestion>();
for (Representation result : results) {
Suggestion match = matchLabels(result);
if (match.getMatch() != MATCH.NONE) {
suggestions.add(match);
}
}
// sort the suggestions
if (suggestions.size() > 1) {
Collections.sort(suggestions, Suggestion.DEFAULT_SUGGESTION_COMPARATOR);
}
// remove all elements > config.getMaxSuggestions()
return suggestions;
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class HtmlExtractorEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
HtmlExtractor extractor = new HtmlExtractor(htmlExtractorRegistry, htmlParser);
Graph model = new SimpleGraph();
ci.getLock().readLock().lock();
try {
extractor.extract(ci.getUri().getUnicodeString(), ci.getStream(), null, ci.getMimeType(), model);
} catch (ExtractorException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with HtmlExtractor", e);
} finally {
ci.getLock().readLock().unlock();
}
ClerezzaRDFUtils.urifyBlankNodes(model);
// make the model single rooted
if (singleRootRdf) {
ClerezzaRDFUtils.makeConnected(model, ci.getUri(), new IRI(NIE_NS + "contains"));
}
// add the extracted triples to the metadata of the ContentItem
ci.getLock().writeLock().lock();
try {
LOG.info("Model: {}", model);
ci.getMetadata().addAll(model);
model = null;
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class NamedEntityTaggingEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
final Site site;
if (referencedSiteID != null) {
// lookup the referenced site
site = siteManager.getSite(referencedSiteID);
// ensure that it is present
if (site == null) {
String msg = String.format("Unable to enhance %s because Referenced Site %s is currently not active!", ci.getUri().getUnicodeString(), referencedSiteID);
log.warn(msg);
// throw new EngineException(msg);
return;
}
// and that it supports offline mode if required
if (isOfflineMode() && !site.supportsLocalMode()) {
log.warn("Unable to enhance ci {} because OfflineMode is not supported by ReferencedSite {}.", ci.getUri().getUnicodeString(), site.getId());
return;
}
} else {
// null indicates to use the Entityhub to lookup Entities
site = null;
}
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// Retrieve the existing text annotations (requires read lock)
Map<NamedEntity, List<IRI>> textAnnotations = new HashMap<NamedEntity, List<IRI>>();
// the language extracted for the parsed content or NULL if not
// available
String contentLangauge;
ci.getLock().readLock().lock();
try {
contentLangauge = EnhancementEngineHelper.getLanguage(ci);
for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
IRI uri = (IRI) it.next().getSubject();
if (graph.filter(uri, Properties.DC_RELATION, null).hasNext()) {
// skip
continue;
}
NamedEntity namedEntity = NamedEntity.createFromTextAnnotation(graph, uri);
if (namedEntity != null) {
// This is a first occurrence, collect any subsumed
// annotations
List<IRI> subsumed = new ArrayList<IRI>();
for (Iterator<Triple> it2 = graph.filter(null, Properties.DC_RELATION, uri); it2.hasNext(); ) {
subsumed.add((IRI) it2.next().getSubject());
}
textAnnotations.put(namedEntity, subsumed);
}
}
} finally {
ci.getLock().readLock().unlock();
}
// search the suggestions
Map<NamedEntity, List<Suggestion>> suggestions = new HashMap<NamedEntity, List<Suggestion>>(textAnnotations.size());
for (Entry<NamedEntity, List<IRI>> entry : textAnnotations.entrySet()) {
try {
List<Suggestion> entitySuggestions = computeEntityRecommentations(site, entry.getKey(), entry.getValue(), contentLangauge);
if (entitySuggestions != null && !entitySuggestions.isEmpty()) {
suggestions.put(entry.getKey(), entitySuggestions);
}
} catch (EntityhubException e) {
throw new EngineException(this, ci, e);
}
}
// now write the results (requires write lock)
ci.getLock().writeLock().lock();
try {
RdfValueFactory factory = RdfValueFactory.getInstance();
Map<String, Representation> entityData = new HashMap<String, Representation>();
for (Entry<NamedEntity, List<Suggestion>> entitySuggestions : suggestions.entrySet()) {
List<IRI> subsumed = textAnnotations.get(entitySuggestions.getKey());
List<BlankNodeOrIRI> annotationsToRelate = new ArrayList<BlankNodeOrIRI>(subsumed);
annotationsToRelate.add(entitySuggestions.getKey().getEntity());
for (Suggestion suggestion : entitySuggestions.getValue()) {
log.debug("Add Suggestion {} for {}", suggestion.getEntity().getId(), entitySuggestions.getKey());
EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, graph, ci.getUri(), annotationsToRelate, suggestion, nameField, // header)?!
contentLangauge == null ? DEFAULT_LANGUAGE : contentLangauge);
if (dereferenceEntities) {
entityData.put(suggestion.getEntity().getId(), suggestion.getEntity().getRepresentation());
}
}
}
// Representations to add! If false entityData will be empty
for (Representation rep : entityData.values()) {
graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class TestLocationEnhancementEngine method testLocationEnhancementEngine.
@Test
public void testLocationEnhancementEngine() throws IOException, EngineException {
// create a content item
ContentItem ci = getContentItem("urn:org.apache:stanbol.enhancer:text:content-item:person", CONTEXT);
// add three text annotations to be consumed by this test
getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
// perform the computation of the enhancements
try {
locationEnhancementEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e, "overloaded with requests");
return;
}
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(locationEnhancementEngine.getClass().getName()));
// adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
/*
* Note:
* - Expected results depend on the geonames.org data. So if the test
* fails it may also mean that the data provided by geonames.org have
* changed
*/
int entityAnnotationCount = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
// two suggestions for New Zealand and one hierarchy entry for the first
// suggestion
// NOTE 2012-10-10: changed expected value back to "3" as geonames.org
// again returns "Oceania" as parent for "New Zealand"
// NOTE: 2012-11-12: deactivated this check, because this the fact that
// "Oceania" is returned as parent for "New Zealand" changes every
// every view weeks
// assertEquals(3, entityAnnotationCount);
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class RefactorEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
// Prepare the OntoNet environment. First we create the OntoNet session in which run the whole
final Session session;
try {
session = sessionManager.createSession();
} catch (SessionLimitException e1) {
throw new EngineException("OntoNet session quota reached. The Refactor Engine requires its own new session to execute.");
}
if (session == null)
throw new EngineException("Failed to create OntoNet session. The Refactor Engine requires its own new session to execute.");
log.debug("Refactor enhancement job will run in session '{}'.", session.getID());
// Retrieve and filter the metadata graph for entities recognized by the engines.
final Graph metadataGraph = ci.getMetadata(), signaturesGraph = new IndexedGraph();
// FIXME the Stanbol Enhancer vocabulary should be retrieved from somewhere in the enhancer API.
final IRI ENHANCER_ENTITY_REFERENCE = new IRI("http://fise.iks-project.eu/ontology/entity-reference");
Iterator<Triple> tripleIt = metadataGraph.filter(null, ENHANCER_ENTITY_REFERENCE, null);
while (tripleIt.hasNext()) {
// Get the entity URI
RDFTerm obj = tripleIt.next().getObject();
if (!(obj instanceof IRI)) {
log.warn("Invalid IRI for entity reference {}. Skipping.", obj);
continue;
}
final String entityReference = ((IRI) obj).getUnicodeString();
log.debug("Trying to resolve entity {}", entityReference);
// Populate the entity signatures graph, by querying either the Entity Hub or the dereferencer.
if (engineConfiguration.isEntityHubUsed()) {
Graph result = populateWithEntity(entityReference, signaturesGraph);
if (result != signaturesGraph && result != null) {
log.warn("Entity Hub query added triples to a new graph instead of populating the supplied one!" + " New signatures will be discarded.");
}
} else
try {
OntologyInputSource<Graph> source = new GraphContentSourceWithPhysicalIRI(dereferencer.resolve(entityReference), org.semanticweb.owlapi.model.IRI.create(entityReference));
signaturesGraph.addAll(source.getRootOntology());
} catch (FileNotFoundException e) {
log.error("Failed to dereference entity " + entityReference + ". Skipping.", e);
continue;
}
}
try {
/*
* The dedicated session for this job will store the following: (1) all the (merged) signatures
* for all detected entities; (2) the original content metadata graph returned earlier in the
* chain.
*
* There is no chance that (2) could be null, as it was previously controlled by the JobManager
* through the canEnhance() method and the computeEnhancement is always called iff the former
* returns true.
*/
session.addOntology(new GraphSource(signaturesGraph));
session.addOntology(new GraphSource(metadataGraph));
} catch (UnmodifiableOntologyCollectorException e1) {
throw new EngineException("Cannot add enhancement graph to OntoNet session for refactoring", e1);
}
try {
/*
* Export the entire session (incl. entities and enhancement graph) as a single merged ontology.
*
* TODO the refactorer should have methods to accommodate an OntologyCollector directly instead.
*/
OWLOntology ontology = session.export(OWLOntology.class, true);
log.debug("Refactoring recipe IRI is : " + engineConfiguration.getRecipeId());
/*
* We pass the ontology and the recipe IRI to the Refactor that returns the refactored graph
* expressed by using the given vocabulary.
*
* To perform the refactoring of the ontology to a given vocabulary we use the Stanbol Refactor.
*/
Recipe recipe = ruleStore.getRecipe(new IRI(engineConfiguration.getRecipeId()));
log.debug("Recipe {} contains {} rules.", recipe, recipe.getRuleList().size());
log.debug("The ontology to be refactor is {}", ontology);
Graph tc = refactorer.graphRefactoring(OWLAPIToClerezzaConverter.owlOntologyToClerezzaGraph(ontology), recipe);
/*
* The newly generated ontology is converted to Clarezza format and then added os substitued to
* the old mGraph.
*/
if (engineConfiguration.isInGraphAppendMode()) {
log.debug("Metadata of the content will replace old ones.", this);
} else {
metadataGraph.clear();
log.debug("Content metadata will be appended to the existing ones.", this);
}
metadataGraph.addAll(tc);
} catch (RefactoringException e) {
String msg = "Refactor engine execution failed on content item " + ci + ".";
log.error(msg, e);
throw new EngineException(msg, e);
} catch (NoSuchRecipeException e) {
String msg = "Refactor engine could not find recipe " + engineConfiguration.getRecipeId() + " to refactor content item " + ci + ".";
log.error(msg, e);
throw new EngineException(msg, e);
} catch (Exception e) {
throw new EngineException("Refactor Engine has failed.", e);
} finally {
/*
* The session needs to be destroyed anyhow.
*
* Clear contents before destroying (FIXME only do this until this is implemented in the
* destroySession() method).
*/
for (OWLOntologyID id : session.listManagedOntologies()) {
try {
String key = ontologyProvider.getKey(id.getOntologyIRI());
ontologyProvider.getStore().deleteGraph(new IRI(key));
} catch (Exception ex) {
log.error("Failed to delete triple collection " + id, ex);
continue;
}
}
sessionManager.destroySession(session.getID());
}
}
Aggregations