Search in sources :

Example 36 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class KeywordLinkingEngine method writeEnhancements.

/**
     * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
     * extracted from the parsed ContentItem
     * @param ci
     * @param linkedEntities
     * @param language
     */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        //first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
            textAnnotations.add(textAnnotation);
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, literalFactory.createTypedLiteral(occurrence.getStart())));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, literalFactory.createTypedLiteral(occurrence.getEnd())));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
        }
        //now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            //should we use the label used for the match, or search the
            //representation for the best label ... currently its the matched one
            Text label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label.getLanguage() == null ? new PlainLiteralImpl(label.getText()) : new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()))));
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_REFERENCE, new IRI(suggestion.getRepresentation().getId())));
            Iterator<Reference> suggestionTypes = suggestion.getRepresentation().getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, new IRI(suggestionTypes.next().getReference())));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            //add the name of the ReferencedSite providing this suggestion
            metadata.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(referencedSiteName)));
            //add the RDF data for entities
            if (dereferenceEntitiesState) {
                metadata.addAll(RdfValueFactory.getInstance().toRdfRepresentation(suggestion.getRepresentation()).getRdfGraph());
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) ArrayList(java.util.ArrayList) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) Suggestion(org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity.Occurrence)

Example 37 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class Suggestion method getBestLabel.

/**
     * Getter for the best label in the given language
     * @param suggestion the suggestion
     * @param nameField the field used to search for labels
     * @param language the language
     * @return the best match or {@link Suggestion#getMatchedLabel()} if non is found
     */
public Text getBestLabel(String nameField, String language) {
    Representation rep = getRepresentation();
    //start with the matched label -> so if we do not find a better one
    //we will use the matched!
    Text label = this.label;
    // 1. check if the returned Entity does has a label -> if not return null
    // add labels (set only a single label. Use "en" if available!
    Iterator<Text> labels = rep.getText(nameField);
    boolean matchFound = false;
    while (labels.hasNext() && !matchFound) {
        Text actLabel = labels.next();
        if (label == null) {
            //take any label at first
            label = actLabel;
        }
        //now we have already a label check the language
        String actLang = actLabel.getLanguage();
        //use startWith to match also en-GB and en-US ...
        if (actLang != null && actLang.startsWith(language)) {
            //prefer labels with the correct language
            label = actLabel;
            if (this.label.getText().equalsIgnoreCase(label.getText())) {
                //found label in that language that exactly matches the
                //label used to match the text
                matchFound = true;
            }
        }
    }
    if (label == null) {
        //if no label was found ... return the one used for the match
        label = getMatchedLabel();
    }
    return label;
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text)

Example 38 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class ValueFactoryTest method testText.

/**
     * Internally used to create and text {@link Text}s for the different tests
     * 
     * @param textString
     *            the natural language text as string
     * @param language
     *            the language
     * @return the created {@link Text} instance that can be used to perform further tests.
     */
private Text testText(String textString, String language) {
    ValueFactory vf = getValueFactory();
    Text text = vf.createText(textString, language);
    assertNotNull(text.getText());
    assertNotNull(text.getText());
    assertEquals(text.getText(), textString);
    if (language == null) {
        assertTrue(text.getLanguage() == null);
    } else if (language.isEmpty()) {
        // implementations are free to change an empty language string to null
        // NOTE that it is not allowed to change NULL to an empty String!
        assertTrue(text.getLanguage() == null || text.getLanguage().isEmpty());
    } else {
        assertNotNull(text.getLanguage());
        assertEquals(text.getLanguage(), language);
    }
    return text;
}
Also used : Text(org.apache.stanbol.entityhub.servicesapi.model.Text) ValueFactory(org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)

Example 39 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class RepresentationTest method testNonExistingFields.

/**
     * Tests if value iterators for non existing fields return an Iterator with no elements (Here it is
     * important, that in such cases methods do not return <code>null</code>).
     */
@Test
public void testNonExistingFields() {
    String field = "urn:this.field:does.not:exist";
    // Iterators MUST NOT be NULL but MUST NOT contain any element
    Representation rep = createRepresentation(null);
    Iterator<String> fieldIt = rep.getFieldNames();
    assertNotNull(fieldIt);
    assertFalse(fieldIt.hasNext());
    Iterator<Object> valueIt = rep.get(field);
    assertNotNull(valueIt);
    assertFalse(valueIt.hasNext());
    Iterator<Reference> refIt = rep.getReferences(field);
    assertNotNull(refIt);
    assertFalse(refIt.hasNext());
    Iterator<Text> textIt = rep.get(field, (String[]) null);
    assertNotNull(textIt);
    assertFalse(textIt.hasNext());
}
Also used : Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) Test(org.junit.Test)

Example 40 with Text

use of org.apache.stanbol.entityhub.servicesapi.model.Text in project stanbol by apache.

the class RepresentationTest method testRemoveAllTextsOfMultipleLanguages.

@Test
public void testRemoveAllTextsOfMultipleLanguages() {
    // remove all texts of multiple languages
    String field = "urn:the.field:used.for.this.Test";
    Representation rep = initNaturalLanguageTest(field);
    Set<String> textSet = new HashSet<String>(NL_TEST_all);
    rep.removeAllNaturalText(field, "de", "de-AT");
    for (Iterator<Text> texts = rep.getText(field); texts.hasNext(); textSet.remove(texts.next().getText())) ;
    assertTrue(textSet.size() == 2);
    assertTrue(textSet.remove(NL_TEST_de));
    assertTrue(textSet.remove(NL_TEST_de_AT));
}
Also used : Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

Text (org.apache.stanbol.entityhub.servicesapi.model.Text)50 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)32 Test (org.junit.Test)24 HashSet (java.util.HashSet)14 Reference (org.apache.stanbol.entityhub.servicesapi.model.Reference)12 ArrayList (java.util.ArrayList)11 IRI (org.apache.clerezza.commons.rdf.IRI)6 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)4 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)4 ValueFactory (org.apache.stanbol.entityhub.servicesapi.model.ValueFactory)4 RepresentationTest (org.apache.stanbol.entityhub.test.model.RepresentationTest)4 Graph (org.apache.clerezza.commons.rdf.Graph)3 Language (org.apache.clerezza.commons.rdf.Language)3 Literal (org.apache.clerezza.commons.rdf.Literal)3 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)3 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)3 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)3 TextConstraint (org.apache.stanbol.entityhub.servicesapi.query.TextConstraint)3 URI (java.net.URI)2 URL (java.net.URL)2