use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class KeywordLinkingEngineTest method testTaxonomyLinker.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked)
* @throws Exception
*/
@Test
public void testTaxonomyLinker() throws Exception {
OpenNlpAnalysedContentFactory acf = OpenNlpAnalysedContentFactory.getInstance(openNLP, new TextAnalyzerConfig());
EntityLinkerConfig config = new EntityLinkerConfig();
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(acf.create(TEST_TEXT, "en"), searcher, config);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
Arrays.asList("urn:test:redirect:Geologist")));
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
for (LinkedEntity linkedEntity : linker.getLinkedEntities().values()) {
List<String> expectedSuggestions = expectedResults.remove(linkedEntity.getSelectedText());
assertNotNull("LinkedEntity " + linkedEntity.getSelectedText() + "is not an expected Result (or was found twice)", expectedSuggestions);
linkedEntity.getSuggestions().iterator();
assertEquals("Number of suggestions " + linkedEntity.getSuggestions().size() + " != number of expected suggestions " + expectedSuggestions.size() + "for selection " + linkedEntity.getSelectedText(), linkedEntity.getSuggestions().size(), expectedSuggestions.size());
double score = linkedEntity.getScore();
for (int i = 0; i < expectedSuggestions.size(); i++) {
Suggestion suggestion = linkedEntity.getSuggestions().get(i);
assertEquals("Expecced Suggestion at Rank " + i + " expected: " + expectedSuggestions.get(i) + " suggestion: " + suggestion.getRepresentation().getId(), expectedSuggestions.get(i), suggestion.getRepresentation().getId());
assertTrue("Score of suggestion " + i + "(" + suggestion.getScore() + " > as of the previous one (" + score + ")", score >= suggestion.getScore());
score = suggestion.getScore();
}
}
}
use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class KeywordLinkingEngineTest method testEngine.
/**
* This tests if the Enhancements created by the Engine confirm to the
* rules defined for the Stanbol Enhancement Structure.
* @throws IOException
* @throws EngineException
*/
@Test
public void testEngine() throws IOException, EngineException {
EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
KeywordLinkingEngine engine = KeywordLinkingEngine.createInstance(openNLP, searcher, new TextAnalyzerConfig(), linkerConfig);
engine.referencedSiteName = TEST_REFERENCED_SITE_NAME;
ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
//tells the engine that this is an English text
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
//compute the enhancements
engine.computeEnhancements(ci);
//validate the enhancement results
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//validate create fise:TextAnnotations
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
//validate create fise:EntityAnnotations
int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class TextAnalyzerTest method testSingleSentenceChunkerConfig.
@Test
public void testSingleSentenceChunkerConfig() {
TextAnalyzerConfig config = new TextAnalyzerConfig();
config.forcePosTypeChunker(false);
TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
assertNotNull(analysed);
//check the default config
assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
assertTrue(analyzer.getConfig().isPosTaggerEnable());
assertTrue(analyzer.getConfig().isChunkerEnabled());
assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
assertFalse(analyzer.getConfig().isPosTypeChunkerForced());
checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, true, true);
}
use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class TextAnalyzerTest method testSingleSentenceNoChunkerNoPosConfig.
@Test
public void testSingleSentenceNoChunkerNoPosConfig() {
TextAnalyzerConfig config = new TextAnalyzerConfig();
config.enablePosTagger(false);
//must be ignored for Chunks if no Pos
config.enableChunker(true);
TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
assertNotNull(analysed);
//check the default config
assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
assertFalse(analyzer.getConfig().isPosTaggerEnable());
assertTrue(analyzer.getConfig().isChunkerEnabled());
assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
assertTrue(analyzer.getConfig().isPosTypeChunkerForced());
checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, false, false);
}
use of org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig in project stanbol by apache.
the class TextAnalyzerTest method testSingleSentenceNoChunkerConfig.
@Test
public void testSingleSentenceNoChunkerConfig() {
TextAnalyzerConfig config = new TextAnalyzerConfig();
config.enableChunker(false);
TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
assertNotNull(analysed);
//check the default config
assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
assertTrue(analyzer.getConfig().isPosTaggerEnable());
assertFalse(analyzer.getConfig().isChunkerEnabled());
assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
assertTrue(analyzer.getConfig().isPosTypeChunkerForced());
checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, true, false);
}
Aggregations