Search in sources :

Example 6 with EntityLinkerConfig

use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.

the class EntityLinkingEngineTest method testEntityLinkerWithWrongOrder.

/**
     * This tests the EntityLinker functionality (if the expected Entities
     * are linked). In this case with the default configurations for
     * {@link LexicalCategory#Noun}.
     * @throws Exception
     */
@Test
public void testEntityLinkerWithWrongOrder() throws Exception {
    LanguageProcessingConfig tpc = new LanguageProcessingConfig();
    tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
    tpc.setLinkedPos(Collections.EMPTY_SET);
    //to emulate pre STANBOL-1211
    tpc.setIgnoreChunksState(true);
    EntityLinkerConfig config = new EntityLinkerConfig();
    //this is assumed by this test
    config.setMinFoundTokens(2);
    config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
    EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT_WO, "en", tpc, searcher, config, labelTokenizer);
    linker.process();
    Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
    expectedResults.put("Marshall Patrick", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
    expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
    Arrays.asList("urn:test:redirect:Geologist")));
    expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
    expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
    validateEntityLinkerResults(linker, expectedResults);
}
Also used : LanguageProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig) EntityLinkerConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) EntityLinker(org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker) Test(org.junit.Test)

Example 7 with EntityLinkerConfig

use of org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig in project stanbol by apache.

the class EntityLinkingEngineTest method testEngine.

/**
     * This tests if the Enhancements created by the Engine confirm to the
     * rules defined for the Stanbol Enhancement Structure.
     * @throws IOException
     * @throws EngineException
     */
@Test
public void testEngine() throws IOException, EngineException {
    EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
    linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
    //this is assumed by this test
    linkerConfig.setMinFoundTokens(2);
    EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer);
    ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    //tells the engine that this is an English text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
    //and add the AnalysedText instance used for this test
    ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
    //compute the enhancements
    engine.computeEnhancements(ci);
    //validate the enhancement results
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    //validate create fise:TextAnnotations
    int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
    assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
    //validate create fise:EntityAnnotations
    int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
    assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) EntityLinkerConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig) TextProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

EntityLinkerConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig)7 Test (org.junit.Test)6 HashMap (java.util.HashMap)4 TextProcessingConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig)4 ArrayList (java.util.ArrayList)3 List (java.util.List)3 LanguageProcessingConfig (org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig)3 EntityLinker (org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker)3 Hashtable (java.util.Hashtable)2 FstLinkingEngine (org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngine)2 NoSuchElementException (java.util.NoSuchElementException)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 Activate (org.apache.felix.scr.annotations.Activate)1 LabelTokenizer (org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer)1 EntityLinkingEngine (org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine)1 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)1 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)1