Search in sources :

Example 36 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class IndexedGraphTest method createGraph.

private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
    Random rnd = new Random();
    if (seed != null) {
        rnd.setSeed(seed);
    }
    LiteralFactory lf = LiteralFactory.getInstance();
    // randoms are in the range [0..3]
    // literal
    double l = 1.0;
    // int
    double i = l / 3;
    // double
    double d = l * 2 / 3;
    // bNode
    double b = 2.0;
    // create new bNode
    double nb = b - (l * 2 / 3);
    double random;
    BlankNodeOrIRI subject = null;
    IRI predicate = null;
    List<IRI> predicateList = new ArrayList<IRI>();
    predicateList.add(RDF.first);
    predicateList.add(RDF.rest);
    predicateList.add(RDF.type);
    predicateList.add(RDFS.label);
    predicateList.add(RDFS.comment);
    predicateList.add(RDFS.range);
    predicateList.add(RDFS.domain);
    predicateList.add(FOAF.name);
    predicateList.add(FOAF.nick);
    predicateList.add(FOAF.homepage);
    predicateList.add(FOAF.age);
    predicateList.add(FOAF.depiction);
    String URI_PREFIX = "http://www.test.org/bigGraph/ref";
    Language DE = new Language("de");
    Language EN = new Language("en");
    Iterator<IRI> predicates = predicateList.iterator();
    List<BlankNode> bNodes = new ArrayList<BlankNode>();
    bNodes.add(new BlankNode());
    for (int count = 0; tc.size() < triples; count++) {
        random = rnd.nextDouble() * 3;
        if (random >= 2.5 || count == 0) {
            if (random <= 2.75) {
                subject = new IRI(URI_PREFIX + count);
            } else {
                int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
                subject = bNodes.get(rndIndex);
            }
        }
        if (random > 2.0 || count == 0) {
            if (!predicates.hasNext()) {
                Collections.shuffle(predicateList, rnd);
                predicates = predicateList.iterator();
            }
            predicate = predicates.next();
        }
        if (random <= l) {
            // literal
            if (random <= i) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
            } else if (random <= d) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
            } else {
                Literal text;
                if (random <= i) {
                    text = new PlainLiteralImpl("Literal for " + count);
                } else if (random <= d) {
                    text = new PlainLiteralImpl("An English literal for " + count, EN);
                } else {
                    text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
                }
                tc.add(new TripleImpl(subject, predicate, text));
            }
        } else if (random <= b) {
            // bnode
            BlankNode bnode;
            if (random <= nb) {
                bnode = new BlankNode();
                bNodes.add(bnode);
            } else {
                // >nb <b
                int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
                bnode = bNodes.get(rndIndex);
            }
            tc.add(new TripleImpl(subject, predicate, bnode));
        } else {
            // IRI
            tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Random(java.util.Random) Language(org.apache.clerezza.commons.rdf.Language) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 37 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class UserResource method changeUser.

/**
 * Modify user given a graph describing the change.
 *
 * @param inputGraph change graph
 * @return HTTP response
 */
@POST
@Consumes(SupportedFormat.TURTLE)
@Path("change-user")
public Response changeUser(ImmutableGraph inputGraph) {
    Lock readLock = systemGraph.getLock().readLock();
    readLock.lock();
    Iterator<Triple> changes = inputGraph.filter(null, null, Ontology.Change);
    Triple oldTriple = null;
    Triple newTriple = null;
    if (changes.hasNext()) {
        Triple changeTriple = changes.next();
        BlankNodeOrIRI changeNode = changeTriple.getSubject();
        Literal userName = (Literal) inputGraph.filter(changeNode, PLATFORM.userName, null).next().getObject();
        Iterator<Triple> userTriples = systemGraph.filter(null, PLATFORM.userName, userName);
        // if (userTriples.hasNext()) {
        BlankNodeOrIRI userNode = userTriples.next().getSubject();
        IRI predicateIRI = (IRI) inputGraph.filter(changeNode, Ontology.predicate, null).next().getObject();
        // handle old value (if it exists)
        Iterator<Triple> iterator = inputGraph.filter(changeNode, Ontology.oldValue, null);
        RDFTerm oldValue = null;
        if (iterator.hasNext()) {
            oldValue = iterator.next().getObject();
            // Triple oldTriple = systemGraph.filter(null, predicateIRI,
            // oldValue).next();
            Iterator<Triple> oldTriples = systemGraph.filter(userNode, predicateIRI, oldValue);
            if (oldTriples.hasNext()) {
                oldTriple = oldTriples.next();
            }
        }
        RDFTerm newValue = inputGraph.filter(changeNode, Ontology.newValue, null).next().getObject();
        newTriple = new TripleImpl(userNode, predicateIRI, newValue);
    // }
    }
    readLock.unlock();
    Lock writeLock = systemGraph.getLock().writeLock();
    writeLock.lock();
    if (oldTriple != null) {
        systemGraph.remove(oldTriple);
    }
    systemGraph.add(newTriple);
    writeLock.unlock();
    // seems the most appropriate response
    return Response.noContent().build();
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Lock(java.util.concurrent.locks.Lock) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes)

Example 38 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class EntityCoMentionEngine method writeComentions.

private void writeComentions(ContentItem ci, Collection<LinkedEntity> comentions, String language, Set<IRI> textAnnotations) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    // we MUST adjust the confidence level of existing annotations only once
    // se we need to keep track of those
    Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
    log.debug("Write Co-Mentions:");
    for (LinkedEntity comention : comentions) {
        log.debug(" > {}", comention);
        // URIs of TextAnnotations for the initial mention of this co-mention
        Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
        for (Suggestion suggestion : comention.getSuggestions()) {
            Entity entity = suggestion.getEntity();
            if (textAnnotations.contains(entity.getUri())) {
                // if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
                // this is a textAnnotation
                initialMentions.add(entity.getUri());
            }
        // else TODO support also Entities!!
        }
        // create the TextAnnotations for the co-mention
        for (Occurrence occurrence : comention.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            // search for existing text annotation
            boolean ignore = false;
            // search for textAnnotations with the same end
            IRI textAnnotation = null;
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
                if (end != null && textAnnotations.contains(t.getSubject())) {
                    // metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (end > occurrence.getEnd()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        // so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            it = metadata.filter(null, ENHANCER_END, endLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
                if (start != null && textAnnotations.contains(t.getSubject())) {
                    // metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (start < occurrence.getStart()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        // so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            if (!ignore) {
                // collect confidence values of co-mentions
                // maximum confidence of suggestions of the initial mention
                Double maxConfidence = null;
                // maximum confidence of existing suggestions
                Double maxExistingConfidence = null;
                if (textAnnotation == null) {
                    // not found ... create a new TextAnnotation for the co-mention
                    textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    // add it to the set of TextAnnotations
                    textAnnotations.add(textAnnotation);
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                } else {
                    // if existing add this engine as contributor
                    metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
                // maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation,
                // ENHANCER_CONFIDENCE, Double.class, literalFactory);
                }
                // now process initial mention(s) for the co-mention
                Set<IRI> dcTypes = new HashSet<IRI>();
                for (IRI initialMention : initialMentions) {
                    // get the dc:type(s) of the initial mentions
                    Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
                    while (dcTypesIt.hasNext()) {
                        dcTypes.add(dcTypesIt.next());
                    }
                    // check confidence of the initial mention (fise:TextAnnotation)
                    Double confidnece = EnhancementEngineHelper.get(metadata, initialMention, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                    if (confidnece != null) {
                        if (maxConfidence == null) {
                            maxConfidence = confidnece;
                        } else if (maxConfidence.compareTo(confidnece) <= 0) {
                            maxConfidence = confidnece;
                        }
                    }
                    // else nothing to do
                    // now we need to compare the suggestions of the initial
                    // mention(s) with the existing one.
                    // Get information about the suggestions of the initial mention
                    Map<RDFTerm, Double> initialSuggestions = new HashMap<RDFTerm, Double>();
                    Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm, RDFTerm>();
                    for (Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext(); ) {
                        if (!textAnnotations.contains(suggestions)) {
                            BlankNodeOrIRI suggestion = suggestions.next().getSubject();
                            RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
                            if (suggestedEntity != null) {
                                // it has a suggestion
                                Double confidence = EnhancementEngineHelper.get(metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                if (maxConfidence == null) {
                                    maxConfidence = confidence;
                                } else if (confidnece != null && maxConfidence.compareTo(confidnece) <= 0) {
                                    maxConfidence = confidnece;
                                }
                                // else nothing to do
                                initialSuggestions.put(suggestion, confidence);
                                initialSuggestedEntities.put(suggestedEntity, suggestion);
                            }
                        // no suggestion (dc:relation to some other resource)
                        }
                    // else ignore dc:relation to other fise:TextAnnotations
                    }
                    // now we collect existing Suggestions for this TextAnnoation where we need
                    // to adjust the confidence (quite some things to check ....)
                    Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI, Double>();
                    if (maxConfidence != null && confidenceAdjustmentFactor < 1) {
                        // suggestions are defined by incoming dc:releation
                        for (Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation); esIt.hasNext(); ) {
                            BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
                            // but not all of them are suggestions
                            if (!textAnnotations.contains(existingSuggestion)) {
                                // ignore fise:TextAnnotations
                                Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                // ignore fise:TextAnnotations also suggested for the initial mention
                                if (!initialSuggestions.containsKey(existingSuggestion)) {
                                    RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
                                    // suggestions for the initial mention
                                    if (!initialSuggestedEntities.containsKey(suggestedEntity)) {
                                        // finally make sure that we adjust confidences only once
                                        if (!adjustedSuggestions.contains(existingSuggestion)) {
                                            existingSuggestions.put(existingSuggestion, existingConfidence);
                                        }
                                    // else confidence already adjusted
                                    } else {
                                        // different fise:EntityAnnotation, but same reference Entity
                                        // we need to check confidences to decide what to do
                                        RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
                                        Double initialConfidence = initialSuggestions.get(initialSuggestion);
                                        if (initialConfidence == null || (existingConfidence != null && existingConfidence.compareTo(initialConfidence) >= 0)) {
                                            // existing confidence >= initial .. keep existing
                                            initialSuggestions.remove(initialSuggestion);
                                            if (maxExistingConfidence == null) {
                                                maxExistingConfidence = existingConfidence;
                                            } else if (maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                                maxExistingConfidence = existingConfidence;
                                            }
                                        } else {
                                            // adjust this one (if not yet adjusted)
                                            if (!adjustedSuggestions.contains(existingSuggestion)) {
                                                existingSuggestions.put(existingSuggestion, existingConfidence);
                                            }
                                        }
                                    }
                                } else {
                                    // a initial mention already present
                                    // no need to process initial mention
                                    initialSuggestions.remove(existingSuggestion);
                                    if (maxExistingConfidence == null) {
                                        maxExistingConfidence = existingConfidence;
                                    } else if (existingConfidence != null && maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                        maxExistingConfidence = existingConfidence;
                                    }
                                // else maxExistingConfidence == null (undefined)
                                }
                            }
                        // else ignore dc:relations to other fise:TextAnnotations
                        }
                        for (Entry<BlankNodeOrIRI, Double> entry : existingSuggestions.entrySet()) {
                            if (entry.getValue() != null) {
                                double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
                                if (maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence) {
                                    maxExistingConfidence = adjustedConfidence;
                                }
                                EnhancementEngineHelper.set(metadata, entry.getKey(), ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
                                // mark as adjusted
                                adjustedSuggestions.add(entry.getKey());
                            }
                        }
                    }
                    // add the suggestions of the initial mention to this one
                    for (RDFTerm suggestion : initialSuggestions.keySet()) {
                        metadata.add(new TripleImpl((BlankNodeOrIRI) suggestion, DC_RELATION, textAnnotation));
                    }
                    // finally link the co-mentation with the initial one
                    metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
                // metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
                }
                // Adapt the dc:type values of the fise:TextAnnotation
                // - if Suggestions added by this engine do have the max confidence
                // use the dc:type values of the initial mention
                // - if the original suggestions do have a higher confidence keep the
                // existing
                // - in case both do have the same confidence we add all dc:types
                boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null || maxConfidence.compareTo(maxExistingConfidence) >= 0);
                boolean addCoMentionDcTypes = maxExistingConfidence == null || (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
                Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
                while (existingDcTypesIt.hasNext()) {
                    // removeExistingDcTypes == true
                    if ((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes) && removeExistingDcTypes) {
                        // remove the dcType
                        existingDcTypesIt.remove();
                    }
                }
                if (addCoMentionDcTypes) {
                    for (IRI dcType : dcTypes) {
                        // add missing
                        metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
                    }
                }
                // TODO: support also Entities
                if (maxConfidence != null) {
                    // set the confidence value (if known)
                    EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
                }
            }
        // else ignore this occurence
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph)

Example 39 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class EntityLinkingEngine method writeEnhancements.

/**
 * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
 * extracted from the parsed ContentItem
 * @param ci
 * @param linkedEntities
 * @param language
 */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Set<IRI> dereferencedEntitis = new HashSet<IRI>();
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        // first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            // search for existing text annotation
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            IRI textAnnotation = null;
            while (it.hasNext()) {
                Triple t = it.next();
                if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                    textAnnotation = (IRI) t.getSubject();
                    break;
                }
            }
            if (textAnnotation == null) {
                // not found ... create a new one
                textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            } else {
                // if existing add this engine as contributor
                metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
            }
            // add dc:types (even to existing)
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
            textAnnotations.add(textAnnotation);
        }
        // now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            // should we use the label used for the match, or search the
            // representation for the best label ... currently its the matched one
            Literal label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            Entity entity = suggestion.getEntity();
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entity.getUri()));
            Iterator<IRI> suggestionTypes = entity.getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, suggestionTypes.next()));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            // add origin information of the EntiySearcher
            for (Entry<IRI, Collection<RDFTerm>> originInfo : entitySearcher.getOriginInformation().entrySet()) {
                for (RDFTerm value : originInfo.getValue()) {
                    metadata.add(new TripleImpl(entityAnnotation, originInfo.getKey(), value));
                }
            }
            if (writeRankings) {
                Float ranking = suggestion.getEntity().getEntityRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, // write the float as double
                    new TypedLiteralImpl(ranking.toString(), XSD_DOUBLE)));
                }
            }
            // add the RDF data for entities
            if (linkerConfig.isDereferenceEntitiesEnabled() && dereferencedEntitis.add(entity.getUri())) {
                // NOTE: do not add all triples as there might be other data in the graph
                for (Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null); triples.hasNext(); metadata.add(triples.next())) ;
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) Triple(org.apache.clerezza.commons.rdf.Triple) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet)

Example 40 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class EnhancementRDFUtils method writeEntityAnnotation.

/**
 * @param literalFactory
 *            the LiteralFactory to use
 * @param graph
 *            the Graph to use
 * @param contentItemId
 *            the contentItemId the enhancement is extracted from
 * @param relatedEnhancements
 *            enhancements this textAnnotation is related to
 * @param suggestion
 *            the entity suggestion
 * @param nameField the field used to extract the name
 * @param lang the preferred language to include or <code>null</code> if none
 */
public static IRI writeEntityAnnotation(EnhancementEngine engine, LiteralFactory literalFactory, Graph graph, IRI contentItemId, Collection<BlankNodeOrIRI> relatedEnhancements, Suggestion suggestion, String nameField, String lang) {
    Representation rep = suggestion.getEntity().getRepresentation();
    // 1. extract the "best label"
    // Start with the matched one
    Text label = suggestion.getMatchedLabel();
    // if the matched label is not in the requested language
    boolean langMatch = (lang == null && label.getLanguage() == null) || (label.getLanguage() != null && label.getLanguage().startsWith(lang));
    // search if a better label is available for this Entity
    if (!langMatch) {
        Iterator<Text> labels = rep.getText(nameField);
        while (labels.hasNext() && !langMatch) {
            Text actLabel = labels.next();
            langMatch = (lang == null && actLabel.getLanguage() == null) || (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang));
            if (langMatch) {
                // if the language matches ->
                // override the matched label
                label = actLabel;
            }
        }
    }
    // else the matched label will be the best to use
    Literal literal;
    if (label.getLanguage() == null) {
        literal = new PlainLiteralImpl(label.getText());
    } else {
        literal = new PlainLiteralImpl(label.getText(), new Language(label.getLanguage()));
    }
    // Now create the entityAnnotation
    IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(graph, engine, contentItemId);
    // first relate this entity annotation to the text annotation(s)
    for (BlankNodeOrIRI enhancement : relatedEnhancements) {
        graph.add(new TripleImpl(entityAnnotation, DC_RELATION, enhancement));
    }
    IRI entityUri = new IRI(rep.getId());
    // add the link to the referred entity
    graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entityUri));
    // add the label parsed above
    graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, literal));
    if (suggestion.getScore() != null) {
        graph.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
    }
    Iterator<Reference> types = rep.getReferences(RDF_TYPE.getUnicodeString());
    while (types.hasNext()) {
        graph.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(types.next().getReference())));
    }
    // add the name of the ReferencedSite that manages the Entity
    if (suggestion.getEntity().getSite() != null) {
        graph.add(new TripleImpl(entityAnnotation, new IRI(RdfResourceEnum.site.getUri()), new PlainLiteralImpl(suggestion.getEntity().getSite())));
    }
    return entityAnnotation;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) Literal(org.apache.clerezza.commons.rdf.Literal) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Text(org.apache.stanbol.entityhub.servicesapi.model.Text) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)143 IRI (org.apache.clerezza.commons.rdf.IRI)104 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)69 Graph (org.apache.clerezza.commons.rdf.Graph)66 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)49 Triple (org.apache.clerezza.commons.rdf.Triple)41 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)26 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)23 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)20 Literal (org.apache.clerezza.commons.rdf.Literal)20 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)20 IOException (java.io.IOException)18 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)17 Test (org.junit.Test)16 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)15 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)14 HashSet (java.util.HashSet)13 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)11