Search in sources :

Example 1 with Occurrence

use of org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity.Occurrence in project stanbol by apache.

the class KeywordLinkingEngine method writeEnhancements.

/**
     * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
     * extracted from the parsed ContentItem
     * @param ci
     * @param linkedEntities
     * @param language
     */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        //first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
            textAnnotations.add(textAnnotation);
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, literalFactory.createTypedLiteral(occurrence.getStart())));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, literalFactory.createTypedLiteral(occurrence.getEnd())));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
        }
        //now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            //should we use the label used for the match, or search the
            //representation for the best label ... currently its the matched one
            Text label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label.getLanguage() == null ? new PlainLiteralImpl(label.getText()) : new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()))));
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_REFERENCE, new IRI(suggestion.getRepresentation().getId())));
            Iterator<Reference> suggestionTypes = suggestion.getRepresentation().getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, new IRI(suggestionTypes.next().getReference())));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            //add the name of the ReferencedSite providing this suggestion
            metadata.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(referencedSiteName)));
            //add the RDF data for entities
            if (dereferenceEntitiesState) {
                metadata.addAll(RdfValueFactory.getInstance().toRdfRepresentation(suggestion.getRepresentation()).getRdfGraph());
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) ArrayList(java.util.ArrayList) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) Suggestion(org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity.Occurrence)

Aggregations

ArrayList (java.util.ArrayList)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Language (org.apache.clerezza.commons.rdf.Language)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 LinkedEntity (org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity)1 Occurrence (org.apache.stanbol.enhancer.engines.keywordextraction.impl.LinkedEntity.Occurrence)1 Suggestion (org.apache.stanbol.enhancer.engines.keywordextraction.impl.Suggestion)1 Reference (org.apache.stanbol.entityhub.servicesapi.model.Reference)1 Text (org.apache.stanbol.entityhub.servicesapi.model.Text)1