use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class KeywordLinkingEngine method writeEnhancements.
/**
* Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
* extracted from the parsed ContentItem
* @param ci
* @param linkedEntities
* @param language
*/
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language) {
Language languageObject = null;
if (language != null && !language.isEmpty()) {
languageObject = new Language(language);
}
Graph metadata = ci.getMetadata();
for (LinkedEntity linkedEntity : linkedEntities) {
Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
//first create the TextAnnotations for the Occurrences
for (Occurrence occurrence : linkedEntity.getOccurrences()) {
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
textAnnotations.add(textAnnotation);
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, literalFactory.createTypedLiteral(occurrence.getStart())));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, literalFactory.createTypedLiteral(occurrence.getEnd())));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
for (IRI dcType : linkedEntity.getTypes()) {
metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
}
}
//now the EntityAnnotations for the Suggestions
for (Suggestion suggestion : linkedEntity.getSuggestions()) {
IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
//should we use the label used for the match, or search the
//representation for the best label ... currently its the matched one
Text label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label.getLanguage() == null ? new PlainLiteralImpl(label.getText()) : new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()))));
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_REFERENCE, new IRI(suggestion.getRepresentation().getId())));
Iterator<Reference> suggestionTypes = suggestion.getRepresentation().getReferences(linkerConfig.getTypeField());
while (suggestionTypes.hasNext()) {
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, new IRI(suggestionTypes.next().getReference())));
}
metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
for (IRI textAnnotation : textAnnotations) {
metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
}
//add the name of the ReferencedSite providing this suggestion
metadata.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(referencedSiteName)));
//add the RDF data for entities
if (dereferenceEntitiesState) {
metadata.addAll(RdfValueFactory.getInstance().toRdfRepresentation(suggestion.getRepresentation()).getRdfGraph());
}
}
}
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class Suggestion method getBestLabel.
/**
* Getter for the best label in the given language
* @param suggestion the suggestion
* @param nameField the field used to search for labels
* @param language the language
* @return the best match or {@link Suggestion#getMatchedLabel()} if non is found
*/
public Text getBestLabel(String nameField, String language) {
Representation rep = getRepresentation();
//start with the matched label -> so if we do not find a better one
//we will use the matched!
Text label = this.label;
// 1. check if the returned Entity does has a label -> if not return null
// add labels (set only a single label. Use "en" if available!
Iterator<Text> labels = rep.getText(nameField);
boolean matchFound = false;
while (labels.hasNext() && !matchFound) {
Text actLabel = labels.next();
if (label == null) {
//take any label at first
label = actLabel;
}
//now we have already a label check the language
String actLang = actLabel.getLanguage();
//use startWith to match also en-GB and en-US ...
if (actLang != null && actLang.startsWith(language)) {
//prefer labels with the correct language
label = actLabel;
if (this.label.getText().equalsIgnoreCase(label.getText())) {
//found label in that language that exactly matches the
//label used to match the text
matchFound = true;
}
}
}
if (label == null) {
//if no label was found ... return the one used for the match
label = getMatchedLabel();
}
return label;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class ValueFactoryTest method testText.
/**
* Internally used to create and text {@link Text}s for the different tests
*
* @param textString
* the natural language text as string
* @param language
* the language
* @return the created {@link Text} instance that can be used to perform further tests.
*/
private Text testText(String textString, String language) {
ValueFactory vf = getValueFactory();
Text text = vf.createText(textString, language);
assertNotNull(text.getText());
assertNotNull(text.getText());
assertEquals(text.getText(), textString);
if (language == null) {
assertTrue(text.getLanguage() == null);
} else if (language.isEmpty()) {
// implementations are free to change an empty language string to null
// NOTE that it is not allowed to change NULL to an empty String!
assertTrue(text.getLanguage() == null || text.getLanguage().isEmpty());
} else {
assertNotNull(text.getLanguage());
assertEquals(text.getLanguage(), language);
}
return text;
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class RepresentationTest method testNonExistingFields.
/**
* Tests if value iterators for non existing fields return an Iterator with no elements (Here it is
* important, that in such cases methods do not return <code>null</code>).
*/
@Test
public void testNonExistingFields() {
String field = "urn:this.field:does.not:exist";
// Iterators MUST NOT be NULL but MUST NOT contain any element
Representation rep = createRepresentation(null);
Iterator<String> fieldIt = rep.getFieldNames();
assertNotNull(fieldIt);
assertFalse(fieldIt.hasNext());
Iterator<Object> valueIt = rep.get(field);
assertNotNull(valueIt);
assertFalse(valueIt.hasNext());
Iterator<Reference> refIt = rep.getReferences(field);
assertNotNull(refIt);
assertFalse(refIt.hasNext());
Iterator<Text> textIt = rep.get(field, (String[]) null);
assertNotNull(textIt);
assertFalse(textIt.hasNext());
}
use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.
the class RepresentationTest method testRemoveAllTextsOfMultipleLanguages.
@Test
public void testRemoveAllTextsOfMultipleLanguages() {
// remove all texts of multiple languages
String field = "urn:the.field:used.for.this.Test";
Representation rep = initNaturalLanguageTest(field);
Set<String> textSet = new HashSet<String>(NL_TEST_all);
rep.removeAllNaturalText(field, "de", "de-AT");
for (Iterator<Text> texts = rep.getText(field); texts.hasNext(); textSet.remove(texts.next().getText())) ;
assertTrue(textSet.size() == 2);
assertTrue(textSet.remove(NL_TEST_de));
assertTrue(textSet.remove(NL_TEST_de_AT));
}
Aggregations