use of org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker in project stanbol by apache.
the class EntityCoMentionEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
AnalysedText at = getAnalysedText(this, ci, true);
String language = getLanguage(this, ci, true);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if (languageConfig == null) {
throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
}
if (log.isDebugEnabled()) {
log.debug("compute co-mentions for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
}
LabelTokenizer labelTokenizer = (LabelTokenizer) labelTokenizerTracker.getService();
if (labelTokenizer == null) {
throw new EngineException(this, ci, "No LabelTokenizer available!", null);
}
//create the in-memory database for the mentioned Entities
ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(labelTokenizer, language, linkerConfig.getDefaultLanguage());
Graph metadata = ci.getMetadata();
Set<IRI> textAnnotations = new HashSet<IRI>();
ci.getLock().readLock().lock();
try {
//iterate over all TextAnnotations (mentions of Entities)
for (Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
IRI ta = (IRI) it.next().getSubject();
entityMentionIndex.registerTextAnnotation(ta, metadata);
//store the registered text annotations
textAnnotations.add(ta);
}
} finally {
ci.getLock().readLock().unlock();
}
EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entityMentionIndex, linkerConfig, labelTokenizer, entityMentionIndex);
//process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with " + entityLinker, e);
throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
}
//TODO: write results
ci.getLock().writeLock().lock();
try {
writeComentions(ci, entityLinker.getLinkedEntities().values(), language, textAnnotations);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker in project stanbol by apache.
the class EntityLinkingEngineTest method testEntityLinkerWithProperNouns.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked). In this case with the default configurations for
* {@link Pos#ProperNoun}.
* @throws Exception
*/
@Test
public void testEntityLinkerWithProperNouns() throws Exception {
LanguageProcessingConfig tpc = new LanguageProcessingConfig();
tpc.setLinkedLexicalCategories(Collections.EMPTY_SET);
tpc.setLinkedPos(LanguageProcessingConfig.DEFAULT_LINKED_POS);
EntityLinkerConfig config = new EntityLinkerConfig();
//this is assumed by this test
config.setMinFoundTokens(2);
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT, "en", tpc, searcher, config, labelTokenizer);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
//Geologist is a common noun and MUST NOT be found
//expectedResults.put("geologist", new ArrayList<String>(
// Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
validateEntityLinkerResults(linker, expectedResults);
}
use of org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker in project stanbol by apache.
the class EntityLinkingEngineTest method testEntityLinkerWithNouns.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked). In this case with the default configurations for
* {@link LexicalCategory#Noun}.
* @throws Exception
*/
@Test
public void testEntityLinkerWithNouns() throws Exception {
LanguageProcessingConfig tpc = new LanguageProcessingConfig();
tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
tpc.setLinkedPos(Collections.EMPTY_SET);
EntityLinkerConfig config = new EntityLinkerConfig();
//this is assumed by this test
config.setMinFoundTokens(2);
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT, "en", tpc, searcher, config, labelTokenizer);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
Arrays.asList("urn:test:redirect:Geologist")));
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
validateEntityLinkerResults(linker, expectedResults);
}
use of org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker in project stanbol by apache.
the class EntityLinkingEngine method computeEnhancements.
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
log.trace(" enhance ci {}", ci.getUri());
if (isOfflineMode() && !entitySearcher.supportsOfflineMode()) {
throw new EngineException(this, ci, "Offline mode is not supported by the used EntitySearcher!", null);
}
AnalysedText at = getAnalysedText(this, ci, true);
log.debug(" > AnalysedText {}", at);
String language = getLanguage(this, ci, true);
if (log.isDebugEnabled()) {
log.debug("computeEnhancements for ContentItem {} language {} text={}", new Object[] { ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100) });
}
log.debug(" > Language {}", language);
LanguageProcessingConfig languageConfig = textProcessingConfig.getConfiguration(language);
if (languageConfig == null) {
throw new IllegalStateException("The language '" + language + "' is not configured " + "to be processed by this Engine. As this is already checked within the " + "canEnhance(..) method this may indicate an bug in the used " + "EnhanceemntJobManager implementation!");
}
EntityLinker entityLinker = new EntityLinker(at, language, languageConfig, entitySearcher, linkerConfig, labelTokenizer);
//process
try {
entityLinker.process();
} catch (EntitySearcherException e) {
log.error("Unable to link Entities with " + entityLinker, e);
throw new EngineException(this, ci, "Unable to link Entities with " + entityLinker, e);
}
if (log.isInfoEnabled()) {
entityLinker.logStatistics(log);
}
//write results (requires a write lock)
ci.getLock().writeLock().lock();
try {
writeEnhancements(ci, entityLinker.getLinkedEntities().values(), language, linkerConfig.isWriteEntityRankings());
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker in project stanbol by apache.
the class EntityLinkingEngineTest method testEntityLinkerWithWrongOrder.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked). In this case with the default configurations for
* {@link LexicalCategory#Noun}.
* @throws Exception
*/
@Test
public void testEntityLinkerWithWrongOrder() throws Exception {
LanguageProcessingConfig tpc = new LanguageProcessingConfig();
tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
tpc.setLinkedPos(Collections.EMPTY_SET);
//to emulate pre STANBOL-1211
tpc.setIgnoreChunksState(true);
EntityLinkerConfig config = new EntityLinkerConfig();
//this is assumed by this test
config.setMinFoundTokens(2);
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT_WO, "en", tpc, searcher, config, labelTokenizer);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Marshall Patrick", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
Arrays.asList("urn:test:redirect:Geologist")));
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
validateEntityLinkerResults(linker, expectedResults);
}
Aggregations