use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.
the class FstLinkingEngineTest method testFstLinkingWithProperNouns.
@Test
public void testFstLinkingWithProperNouns() throws Exception {
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(PROCESSED_LANGUAGES, Arrays.asList("en;lmmtip;uc=LINK;prob=0.75;pprob=0.75"));
dict.put(PROCESS_ONLY_PROPER_NOUNS_STATE, true);
TextProcessingConfig tpc = TextProcessingConfig.createInstance(dict);
EntityLinkerConfig elc = new EntityLinkerConfig();
//this is assumed by this test
elc.setMinFoundTokens(2);
elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
processConentItem(engine);
validateEnhancements(Arrays.asList("Chancellor", "Angela Merkel", "Greece", "Greeks", "Germany", "SPD"), Arrays.asList(DBPEDIA + "Christian_Democratic_Union_(Germany)", DBPEDIA + "Angela_Merkel", DBPEDIA + "Greece", DBPEDIA + "Germany", DBPEDIA + "Social_Democratic_Party_of_Germany"));
}
use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.
the class EntityLinkingEngineTest method testEntityLinkerWithProperNouns.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked). In this case with the default configurations for
* {@link Pos#ProperNoun}.
* @throws Exception
*/
@Test
public void testEntityLinkerWithProperNouns() throws Exception {
LanguageProcessingConfig tpc = new LanguageProcessingConfig();
tpc.setLinkedLexicalCategories(Collections.EMPTY_SET);
tpc.setLinkedPos(LanguageProcessingConfig.DEFAULT_LINKED_POS);
EntityLinkerConfig config = new EntityLinkerConfig();
//this is assumed by this test
config.setMinFoundTokens(2);
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT, "en", tpc, searcher, config, labelTokenizer);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
//Geologist is a common noun and MUST NOT be found
//expectedResults.put("geologist", new ArrayList<String>(
// Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
validateEntityLinkerResults(linker, expectedResults);
}
use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.
the class EntityLinkingEngineTest method testEntityLinkerWithNouns.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked). In this case with the default configurations for
* {@link LexicalCategory#Noun}.
* @throws Exception
*/
@Test
public void testEntityLinkerWithNouns() throws Exception {
LanguageProcessingConfig tpc = new LanguageProcessingConfig();
tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
tpc.setLinkedPos(Collections.EMPTY_SET);
EntityLinkerConfig config = new EntityLinkerConfig();
//this is assumed by this test
config.setMinFoundTokens(2);
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT, "en", tpc, searcher, config, labelTokenizer);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
Arrays.asList("urn:test:redirect:Geologist")));
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
validateEntityLinkerResults(linker, expectedResults);
}
use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.
the class FstLinkingEngineTest method testFstLinkingWithNouns.
@Test
public void testFstLinkingWithNouns() throws Exception {
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(PROCESSED_LANGUAGES, Arrays.asList("en;lmmtip;uc=LINK;prob=0.75;pprob=0.75"));
dict.put(PROCESS_ONLY_PROPER_NOUNS_STATE, false);
TextProcessingConfig tpc = TextProcessingConfig.createInstance(dict);
EntityLinkerConfig elc = new EntityLinkerConfig();
//this is assumed by this test
elc.setMinFoundTokens(2);
elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
processConentItem(engine);
validateEnhancements(Arrays.asList("Chancellor", "Angela Merkel", "Greece", "Greeks", "Germany", "SPD", "change", "election", "party", "policy"), Arrays.asList(DBPEDIA + "Christian_Democratic_Union_(Germany)", DBPEDIA + "Angela_Merkel", DBPEDIA + "Greece", DBPEDIA + "Germany", DBPEDIA + "Social_Democratic_Party_of_Germany", DBPEDIA + "Chancellor", DBPEDIA + "Election", DBPEDIA + "Party", DBPEDIA + "Policy"));
}
use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.
the class EntityhubLinkingEngine method activate.
@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
Dictionary<String, Object> properties = ctx.getProperties();
bundleContext = ctx.getBundleContext();
EntityLinkerConfig linkerConfig = EntityLinkerConfig.createInstance(properties, prefixService);
TextProcessingConfig textProcessingConfig = TextProcessingConfig.createInstance(properties);
Object value = properties.get(SITE_ID);
//init the EntitySource
if (value == null) {
throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
}
siteName = value.toString();
if (siteName.isEmpty()) {
throw new ConfigurationException(SITE_ID, "The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
}
//get the metadata later set to the enhancement engine
String engineName;
engineMetadata = new Hashtable<String, Object>();
value = properties.get(PROPERTY_NAME);
if (value == null || value.toString().isEmpty()) {
throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
} else {
engineName = value.toString();
}
engineMetadata.put(PROPERTY_NAME, value);
value = properties.get(Constants.SERVICE_RANKING);
engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
//init the tracking entity searcher
trackedServiceCount = 0;
if (Entityhub.ENTITYHUB_IDS.contains(siteName.toLowerCase())) {
entitySearcher = new EntityhubSearcher(bundleContext, 10, this);
} else {
entitySearcher = new ReferencedSiteSearcher(bundleContext, siteName, 10, this);
}
labelTokenizerTracker = new ServiceTracker(bundleContext, LabelTokenizer.class.getName(), new ServiceTrackerCustomizer() {
@Override
public Object addingService(ServiceReference reference) {
Object service = bundleContext.getService(reference);
if (service == null) {
return service;
}
synchronized (labelTokenizersRefs) {
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
//if engine or tracker is null deactivate was already called
}
return service;
}
@Override
public void removedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
//override
labelTokenizersRefs.remove(reference);
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
if (labelTokenizersRefs.isEmpty()) {
log.info(" ... setting LabelTokenizer of Engine '{}' to null", engine.getName());
engine.setLabelTokenizer(null);
} else {
LabelTokenizer lt = (LabelTokenizer) tracker.getService(labelTokenizersRefs.last());
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
}
//if engine or tracker is null deactivate was already called
}
bundleContext.ungetService(reference);
}
@Override
public void modifiedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
//override
labelTokenizersRefs.remove(reference);
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if (engine != null && tracker != null) {
LabelTokenizer lt = (LabelTokenizer) (reference.equals(higest) || higest == null ? service : tracker.getService(higest));
if (!lt.equals(engine.getLabelTokenizer())) {
log.info(" ... setting LabelTokenizer of Engine '{}' to {}", engine.getName(), lt);
engine.setLabelTokenizer(lt);
}
}
//if engine or tracker is null deactivate was already called
}
}
});
//create the engine
entityLinkingEngine = new EntityLinkingEngine(engineName, //the searcher might not be available
entitySearcher, textProcessingConfig, linkerConfig, null);
//start tracking
labelTokenizerTracker.open();
entitySearcher.open();
}
Aggregations