use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.
the class EntityCoMentionEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if (languageConfig == null) {
throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
}
if (log.isDebugEnabled()) {
log.debug("compute co-mentions for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
}
LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
if (labelTokenizer == null) {
throw new EngineException(this, ci, "No LabelTokenizer available!", null);
}
// create the in-memory database for the mentioned Entities
ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
Graph metadata = ci.getMetadata();
Set<IRI> textAnnotations = new HashSet<IRI>();
ci.getLock().readLock().lock();
try {
// iterate over all TextAnnotations (mentions of Entities)
for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
IRI ta = (IRI) it.next().getSubject();
entityMentionIndex.registerTextAnnotation(ta, metadata);
// store the registered text annotations
textAnnotations.add(ta);
}
} finally {
ci.getLock().readLock().unlock();
}
EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
// process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with " + entityLinker, e);
throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
}
// TODO: write results
ci.getLock().writeLock().lock();
try {
writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.
the class MainLabelTokenizer method tokenize.
/* (non-Javadoc)
* @see org.apache.stanbol.enhancer.engines.keywordextraction.impl.LabelTokenizerManager#tokenize(java.lang.String, java.lang.String)
*/
@Override
public String[] tokenize(String label, String language) {
for (ServiceReference ref : getTokenizers(language)) {
LabelTokenizer tokenizer = (LabelTokenizer) labelTokenizerTracker.getService(ref);
if (tokenizer != null) {
log.trace(" > use Tokenizer {} for language {}", tokenizer.getClass(), language);
String[] tokens = tokenizer.tokenize(label, language);
if (tokens != null) {
if (log.isTraceEnabled()) {
log.trace(" - tokenized {} -> {}", label, Arrays.toString(tokens));
}
return tokens;
}
}
}
log.warn("No LabelTokenizer availabel for language {} -> return null", language);
return null;
}
use of org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer in project stanbol by apache.
the class EntityhubLinkingEngine method activate.
@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
Dictionary<String, Object> properties = ctx.getProperties();
bundleContext = ctx.getBundleContext();
EntityLinkerConfig linkerConfig = EntityLinkerConfig.createInstance(properties, prefixService);
TextProcessingConfig textProcessingConfig = TextProcessingConfig.createInstance(properties);
Object value = properties.get(SITE_ID);
// init the EntitySource
if (value == null) {
throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
}
siteName = value.toString();
if (siteName.isEmpty()) {
throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
}
// get the metadata later set to the enhancement engine
String engineName;
engineMetadata = new Hashtable<String, Object>();
value = properties.get(PROPERTY_NAME);
if (value == null || value.toString().isEmpty()) {
throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
} else {
engineName = value.toString();
}
engineMetadata.put(PROPERTY_NAME, value);
value = properties.get(Constants.SERVICE_RANKING);
engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
// init the tracking entity searcher
trackedServiceCount = 0;
if (Entityhub.ENTITYHUB_IDS.contains(siteName.toLowerCase())) {
entitySearcher = new EntityhubSearcher(bundleContext, 10, this);
} else {
entitySearcher = new ReferencedSiteSearcher(bundleContext, siteName, 10, this);
}
labelTokenizerTracker = new ServiceTracker(bundleContext, LabelTokenizer.class.getName(), new ServiceTrackerCustomizer() {
@Override
public Object addingService(ServiceReference reference) {
Object service = bundleContext.getService(reference);
if (service == null) {
return service;
}
synchronized (labelTokenizersRefs) {
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
// if engine or tracker is null deactivate was already called
}
return service;
}
@Override
public void removedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
// override
labelTokenizersRefs.remove(reference);
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
if (labelTokenizersRefs.isEmpty()) {
log.info(" ... setting LabelTokenizer of Engine '{}' to null", engine.getName());
engine.setLabelTokenizer(null);
} else {
LabelTokenizer lt = (LabelTokenizer) tracker.getService(labelTokenizersRefs.last());
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
}
// if engine or tracker is null deactivate was already called
}
bundleContext.ungetService(reference);
}
@Override
public void modifiedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
// override
labelTokenizersRefs.remove(reference);
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
// if engine or tracker is null deactivate was already called
}
}
});
// create the engine
entityLinkingEngine = new EntityLinkingEngine(engineName, // the searcher might not be available
entitySearcher, textProcessingConfig, linkerConfig, null);
// start tracking
labelTokenizerTracker.open();
entitySearcher.open();
}
Aggregations