use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class EntityCoMentionEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if (languageConfig == null) {
throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
}
if (log.isDebugEnabled()) {
log.debug("compute co-mentions for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
}
LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
if (labelTokenizer == null) {
throw new EngineException(this, ci, "No LabelTokenizer available!", null);
}
// create the in-memory database for the mentioned Entities
ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
Graph metadata = ci.getMetadata();
Set<IRI> textAnnotations = new HashSet<IRI>();
ci.getLock().readLock().lock();
try {
// iterate over all TextAnnotations (mentions of Entities)
for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
IRI ta = (IRI) it.next().getSubject();
entityMentionIndex.registerTextAnnotation(ta, metadata);
// store the registered text annotations
textAnnotations.add(ta);
}
} finally {
ci.getLock().readLock().unlock();
}
EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
// process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with " + entityLinker, e);
throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
}
// TODO: write results
ci.getLock().writeLock().lock();
try {
writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class EntityLinkingEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
log.trace(" enhance ci {}", ci.getUri());
if (isOfflineMode() && !entitySearcher.supportsOfflineMode()) {
throw new EngineException(this, ci, "Offline mode is not supported by the used EntitySearcher!", null);
}
AnalysedText at = getAnalysedText(this, ci, true);
log.debug(" > AnalysedText {}", at);
String language = getLanguage(this, ci, true);
if (log.isDebugEnabled()) {
log.debug("computeEnhancements for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
}
log.debug(" > Language {}", language);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if (languageConfig == null) {
throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
}
EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entitySearcher, linkerConfig, labelTokenizer);
// process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with " + entityLinker, e);
throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
}
if (log.isInfoEnabled()) {
entityLinker.logStatistics(log);
}
// write results (requires a write lock)
ci.getLock().writeLock().lock();
try {
writeEnhancements(ci, entityLinker.getLinkedEntities().values(), language, linkerConfig.isWriteEntityRankings());
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class LocationEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
IRI contentItemId = ci.getUri();
Graph graph = ci.getMetadata();
LiteralFactory literalFactory = LiteralFactory.getInstance();
// get all the textAnnotations
/*
* this Map holds the name as key and all the text annotations of
* dc:type dbpedia:Place that select this name as value
* this map is used to avoid multiple lookups for text annotations
* selecting the same name.
*/
Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
while (iterator.hasNext()) {
// the enhancement annotating an place
BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
// this can still be an TextAnnotation of an EntityAnnotation
// so we need to filter TextAnnotation
Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
if (graph.contains(isTextAnnotation)) {
// now get the name
String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
if (name == null) {
log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
} else {
Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
if (placeEnhancements == null) {
placeEnhancements = new ArrayList<BlankNodeOrIRI>();
name2placeEnhancementMap.put(name, placeEnhancements);
}
placeEnhancements.add(placeEnhancement);
}
} else {
// TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
// than we need to parse the name based on the enhancer:entity-name property
}
}
// Now we do have all the names we need to lookup
Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
if (getMaxLocationEnhancements() != null) {
requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
}
for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
List<Toponym> results;
try {
requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
results = geonamesService.searchToponyms(requestParams);
} catch (Exception e) {
/*
* TODO: Review if it makes sense to catch here for each name, or
* to catch the whole loop.
* This depends if single requests can result in Exceptions
* (e.g. because of encoding problems) or if usually Exceptions
* are thrown because of general things like connection issues
* or service unavailability.
*/
throw new EngineException(this, ci, e);
}
if (results != null) {
Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
for (Toponym result : results) {
log.debug("process result {} {}", result.getGeoNameId(), result.getName());
Double score = getToponymScore(result, maxScore);
log.debug(" > score {}", score);
if (score != null) {
if (score < minScore) {
// if score is lower than the under bound, than stop
break;
}
} else {
log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
/*
* NOTE: If score is not present all suggestions are
* added as enhancements to the metadata of the content
* item.
*/
}
// write the enhancement!
BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
log.debug(" > {} >= {}", score, minHierarchyScore);
if (score != null && score >= minHierarchyScore) {
log.debug(" > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
// get the hierarchy
try {
Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
for (int level = 0; hierarchy.hasNext(); level++) {
Toponym hierarchyEntry = hierarchy.next();
// maybe add an configuration
if (level == 0) {
// Mother earth -> ignore
continue;
}
// write it as dependent to the locationEnhancement
if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
// TODO: add additional checks based on possible
// configuration here!
log.debug(" - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
/*
* The hierarchy service dose not provide a score, because it would be 1.0
* so we need to set the score to this value.
* Currently is is set to the value of the suggested entry
*/
writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
}
}
} catch (Exception e) {
log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
}
}
}
}
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class EnhancementJobHandler method processEvent.
/**
* @param job
* @param execution
*/
private void processEvent(EnhancementJob job, BlankNodeOrIRI execution) {
String engineName = getEngine(job.getExecutionPlan(), job.getExecutionNode(execution));
// (1) execute the parsed ExecutionNode
EnhancementEngine engine = engineManager.getEngine(engineName);
if (engine != null) {
// execute the engine
Exception exception = null;
int engineState;
try {
engineState = engine.canEnhance(job.getContentItem());
} catch (EngineException e) {
exception = e;
log.warn("Unable to check if engine '" + engineName + "'(type: " + engine.getClass() + ") can enhance ContentItem '" + job.getContentItem().getUri() + "'!", e);
engineState = EnhancementEngine.CANNOT_ENHANCE;
}
if (engineState == EnhancementEngine.ENHANCE_SYNCHRONOUS) {
// ensure that this engine exclusively access the content item
log.trace("++ w: {}: {}", "start sync execution", engine.getName());
job.getLock().writeLock().lock();
log.trace(">> w: {}: {}", "start sync execution", engine.getName());
try {
engine.computeEnhancements(job.getContentItem());
job.setCompleted(execution);
} catch (EngineException e) {
log.warn(e.getMessage(), e);
job.setFailed(execution, engine, e);
} catch (RuntimeException e) {
log.warn(e.getMessage(), e);
job.setFailed(execution, engine, e);
} finally {
log.trace("<< w: {}: {}", "finished sync execution", engine.getName());
job.getLock().writeLock().unlock();
}
} else if (engineState == EnhancementEngine.ENHANCE_ASYNC) {
try {
log.trace("++ n: start async execution of Engine {}", engine.getName());
engine.computeEnhancements(job.getContentItem());
log.trace("++ n: finished async execution of Engine {}", engine.getName());
job.setCompleted(execution);
} catch (EngineException e) {
log.warn(e.getMessage(), e);
job.setFailed(execution, engine, e);
} catch (RuntimeException e) {
log.warn(e.getMessage(), e);
job.setFailed(execution, engine, e);
}
} else {
// CANNOT_ENHANCE
if (exception != null) {
job.setFailed(execution, engine, exception);
} else {
// can not enhance is not an error
// it just says this engine can not enhance this content item
job.setCompleted(execution);
}
}
} else {
// engine with that name is not available
job.setFailed(execution, null, null);
}
}
use of org.apache.stanbol.enhancer.servicesapi.EngineException in project stanbol by apache.
the class CeliLanguageIdentifierEnhancementEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text = "";
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
try {
String[] tmps = text.split(" ");
List<GuessedLanguage> lista = null;
if (tmps.length > 5)
lista = this.client.guessLanguage(text);
else
lista = this.client.guessQueryLanguage(text);
Graph g = ci.getMetadata();
// in ENHANCE_ASYNC we need to use read/write locks on the ContentItem
ci.getLock().writeLock().lock();
try {
GuessedLanguage gl = lista.get(0);
IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(gl.getLang())));
g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
g.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
} finally {
ci.getLock().writeLock().unlock();
}
} catch (IOException e) {
throw new EngineException("Error while calling the CELI language" + " identifier service (configured URL: " + serviceURL + ")!", e);
} catch (SOAPException e) {
throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI language identifier service!", e);
}
}
Aggregations