use of org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion in project stanbol by apache.
the class KeywordLinkingEngineTest method testTaxonomyLinker.
/**
* This tests the EntityLinker functionality (if the expected Entities
* are linked)
* @throws Exception
*/
@Test
public void testTaxonomyLinker() throws Exception {
OpenNlpAnalysedContentFactory acf = OpenNlpAnalysedContentFactory.getInstance(openNLP, new TextAnalyzerConfig());
EntityLinkerConfig config = new EntityLinkerConfig();
config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
EntityLinker linker = new EntityLinker(acf.create(TEST_TEXT, "en"), searcher, config);
linker.process();
Map<String, List<String>> expectedResults = new HashMap<String, List<String>>();
expectedResults.put("Patrick Marshall", new ArrayList<String>(Arrays.asList("urn:test:PatrickMarshall")));
expectedResults.put("geologist", new ArrayList<String>(//the redirected entity
Arrays.asList("urn:test:redirect:Geologist")));
expectedResults.put("New Zealand", new ArrayList<String>(Arrays.asList("urn:test:NewZealand")));
expectedResults.put("University of Otago", new ArrayList<String>(Arrays.asList("urn:test:UniversityOfOtago", "urn:test:UniversityOfOtago_Texas")));
for (LinkedEntity linkedEntity : linker.getLinkedEntities().values()) {
List<String> expectedSuggestions = expectedResults.remove(linkedEntity.getSelectedText());
assertNotNull("LinkedEntity " + linkedEntity.getSelectedText() + "is not an expected Result (or was found twice)", expectedSuggestions);
linkedEntity.getSuggestions().iterator();
assertEquals("Number of suggestions " + linkedEntity.getSuggestions().size() + " != number of expected suggestions " + expectedSuggestions.size() + "for selection " + linkedEntity.getSelectedText(), linkedEntity.getSuggestions().size(), expectedSuggestions.size());
double score = linkedEntity.getScore();
for (int i = 0; i < expectedSuggestions.size(); i++) {
Suggestion suggestion = linkedEntity.getSuggestions().get(i);
assertEquals("Expecced Suggestion at Rank " + i + " expected: " + expectedSuggestions.get(i) + " suggestion: " + suggestion.getRepresentation().getId(), expectedSuggestions.get(i), suggestion.getRepresentation().getId());
assertTrue("Score of suggestion " + i + "(" + suggestion.getScore() + " > as of the previous one (" + score + ")", score >= suggestion.getScore());
score = suggestion.getScore();
}
}
}
use of org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion in project stanbol by apache.
the class KeywordLinkingEngine method writeEnhancements.
/**
* Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
* extracted from the parsed ContentItem
* @param ci
* @param linkedEntities
* @param language
*/
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language) {
Language languageObject = null;
if (language != null && !language.isEmpty()) {
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
for (LinkedEntity linkedEntity : linkedEntities) {
Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
//first create the TextAnnotations for the Occurrences
for (Occurrence occurrence : linkedEntity.getOccurrences()) {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
textAnnotations.add(textAnnotation);
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, literalFactory.createTypedLiteral(occurrence.getStart())));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, literalFactory.createTypedLiteral(occurrence.getEnd())));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
for (IRI dcType : linkedEntity.getTypes()) {
metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
}
}
//now the EntityAnnotations for the Suggestions
for (Suggestion suggestion : linkedEntity.getSuggestions()) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
//should we use the label used for the match, or search the
//representation for the best label ... currently its the matched one
Text label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label.getLanguage() == null ? new PlainLiteralImpl(label.getText()) : new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()))));
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_REFERENCE, new IRI(suggestion.getRepresentation().getId())));
Iterator<Reference> suggestionTypes = suggestion.getRepresentation().getReferences(linkerConfig.getTypeField());
while (suggestionTypes.hasNext()) {
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, new IRI(suggestionTypes.next().getReference())));
}
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
for (IRI textAnnotation : textAnnotations) {
metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
}
//add the name of the ReferencedSite providing this suggestion
metadata.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(referencedSiteName)));
//add the RDF data for entities
if (dereferenceEntitiesState) {
metadata.addAll(RdfValueFactory.getInstance().toRdfRepresentation(suggestion.getRepresentation()).getRdfGraph());
}
}
}
}
Aggregations