Search in sources :

Example 56 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class EntityCoMentionEngine method writeComentions.

private void writeComentions(ContentItem ci, Collection<LinkedEntity> comentions, String language, Set<IRI> textAnnotations) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    //we MUST adjust the confidence level of existing annotations only once
    //se we need to keep track of those
    Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
    log.debug("Write Co-Mentions:");
    for (LinkedEntity comention : comentions) {
        log.debug(" > {}", comention);
        //URIs of TextAnnotations for the initial mention of this co-mention
        Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
        for (Suggestion suggestion : comention.getSuggestions()) {
            Entity entity = suggestion.getEntity();
            if (textAnnotations.contains(entity.getUri())) {
                //                if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
                //this is a textAnnotation
                initialMentions.add(entity.getUri());
            }
        //else TODO support also Entities!!
        }
        //create the TextAnnotations for the co-mention
        for (Occurrence occurrence : comention.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            //search for existing text annotation
            boolean ignore = false;
            //search for textAnnotations with the same end
            IRI textAnnotation = null;
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
                if (end != null && textAnnotations.contains(t.getSubject())) {
                    //metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (end > occurrence.getEnd()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        //so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            it = metadata.filter(null, ENHANCER_END, endLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
                if (start != null && textAnnotations.contains(t.getSubject())) {
                    //metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (start < occurrence.getStart()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        //so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            if (!ignore) {
                //collect confidence values of co-mentions
                //maximum confidence of suggestions of the initial mention
                Double maxConfidence = null;
                //maximum confidence of existing suggestions
                Double maxExistingConfidence = null;
                if (textAnnotation == null) {
                    //not found ... create a new TextAnnotation for the co-mention
                    textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    //add it to the set of TextAnnotations
                    textAnnotations.add(textAnnotation);
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                } else {
                    //if existing add this engine as contributor
                    metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
                //maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation, 
                //    ENHANCER_CONFIDENCE, Double.class, literalFactory);
                }
                //now process initial mention(s) for the co-mention
                Set<IRI> dcTypes = new HashSet<IRI>();
                for (IRI initialMention : initialMentions) {
                    //get the dc:type(s) of the initial mentions
                    Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
                    while (dcTypesIt.hasNext()) {
                        dcTypes.add(dcTypesIt.next());
                    }
                    //check confidence of the initial mention (fise:TextAnnotation)
                    Double confidnece = EnhancementEngineHelper.get(metadata, initialMention, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                    if (confidnece != null) {
                        if (maxConfidence == null) {
                            maxConfidence = confidnece;
                        } else if (maxConfidence.compareTo(confidnece) <= 0) {
                            maxConfidence = confidnece;
                        }
                    }
                    //else nothing to do
                    //now we need to compare the suggestions of the initial
                    //mention(s) with the existing one. 
                    //Get information about the suggestions of the initial mention
                    Map<RDFTerm, Double> initialSuggestions = new HashMap<RDFTerm, Double>();
                    Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm, RDFTerm>();
                    for (Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext(); ) {
                        if (!textAnnotations.contains(suggestions)) {
                            BlankNodeOrIRI suggestion = suggestions.next().getSubject();
                            RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
                            if (suggestedEntity != null) {
                                //it has a suggestion
                                Double confidence = EnhancementEngineHelper.get(metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                if (maxConfidence == null) {
                                    maxConfidence = confidence;
                                } else if (confidnece != null && maxConfidence.compareTo(confidnece) <= 0) {
                                    maxConfidence = confidnece;
                                }
                                //else nothing to do
                                initialSuggestions.put(suggestion, confidence);
                                initialSuggestedEntities.put(suggestedEntity, suggestion);
                            }
                        //no suggestion (dc:relation to some other resource)
                        }
                    // else ignore dc:relation to other fise:TextAnnotations
                    }
                    //now we collect existing Suggestions for this TextAnnoation where we need
                    //to adjust the confidence (quite some things to check ....)
                    Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI, Double>();
                    if (maxConfidence != null && confidenceAdjustmentFactor < 1) {
                        //suggestions are defined by incoming dc:releation
                        for (Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation); esIt.hasNext(); ) {
                            BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
                            //but not all of them are suggestions
                            if (!textAnnotations.contains(existingSuggestion)) {
                                //ignore fise:TextAnnotations
                                Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                //ignore fise:TextAnnotations also suggested for the initial mention
                                if (!initialSuggestions.containsKey(existingSuggestion)) {
                                    RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
                                    //suggestions for the initial mention
                                    if (!initialSuggestedEntities.containsKey(suggestedEntity)) {
                                        //finally make sure that we adjust confidences only once
                                        if (!adjustedSuggestions.contains(existingSuggestion)) {
                                            existingSuggestions.put(existingSuggestion, existingConfidence);
                                        }
                                    //else confidence already adjusted
                                    } else {
                                        // different fise:EntityAnnotation, but same reference Entity
                                        //we need to check confidences to decide what to do
                                        RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
                                        Double initialConfidence = initialSuggestions.get(initialSuggestion);
                                        if (initialConfidence == null || (existingConfidence != null && existingConfidence.compareTo(initialConfidence) >= 0)) {
                                            //existing confidence >= initial .. keep existing
                                            initialSuggestions.remove(initialSuggestion);
                                            if (maxExistingConfidence == null) {
                                                maxExistingConfidence = existingConfidence;
                                            } else if (maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                                maxExistingConfidence = existingConfidence;
                                            }
                                        } else {
                                            //adjust this one (if not yet adjusted)
                                            if (!adjustedSuggestions.contains(existingSuggestion)) {
                                                existingSuggestions.put(existingSuggestion, existingConfidence);
                                            }
                                        }
                                    }
                                } else {
                                    //a initial mention already present
                                    //no need to process initial mention
                                    initialSuggestions.remove(existingSuggestion);
                                    if (maxExistingConfidence == null) {
                                        maxExistingConfidence = existingConfidence;
                                    } else if (existingConfidence != null && maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                        maxExistingConfidence = existingConfidence;
                                    }
                                //else maxExistingConfidence == null (undefined)
                                }
                            }
                        //else ignore dc:relations to other fise:TextAnnotations
                        }
                        for (Entry<BlankNodeOrIRI, Double> entry : existingSuggestions.entrySet()) {
                            if (entry.getValue() != null) {
                                double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
                                if (maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence) {
                                    maxExistingConfidence = adjustedConfidence;
                                }
                                EnhancementEngineHelper.set(metadata, entry.getKey(), ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
                                //mark as adjusted
                                adjustedSuggestions.add(entry.getKey());
                            }
                        }
                    }
                    //add the suggestions of the initial mention to this one
                    for (RDFTerm suggestion : initialSuggestions.keySet()) {
                        metadata.add(new TripleImpl((BlankNodeOrIRI) suggestion, DC_RELATION, textAnnotation));
                    }
                    //finally link the co-mentation with the initial one
                    metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
                //metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
                }
                // Adapt the dc:type values of the fise:TextAnnotation
                // - if Suggestions added by this engine do have the max confidence
                //   use the dc:type values of the initial mention
                // - if the original suggestions do have a higher confidence keep the
                //   existing
                // - in case both do have the same confidence we add all dc:types
                boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null || maxConfidence.compareTo(maxExistingConfidence) >= 0);
                boolean addCoMentionDcTypes = maxExistingConfidence == null || (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
                Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
                while (existingDcTypesIt.hasNext()) {
                    //removeExistingDcTypes == true
                    if ((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes) && removeExistingDcTypes) {
                        //remove the dcType
                        existingDcTypesIt.remove();
                    }
                }
                if (addCoMentionDcTypes) {
                    for (IRI dcType : dcTypes) {
                        //add missing
                        metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
                    }
                }
                //TODO: support also Entities
                if (maxConfidence != null) {
                    //set the confidence value (if known)
                    EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
                }
            }
        //else ignore this occurence
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph)

Example 57 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class EntityLinkingEngine method writeEnhancements.

/**
     * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
     * extracted from the parsed ContentItem
     * @param ci
     * @param linkedEntities
     * @param language
     */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Set<IRI> dereferencedEntitis = new HashSet<IRI>();
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        //first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            //search for existing text annotation
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            IRI textAnnotation = null;
            while (it.hasNext()) {
                Triple t = it.next();
                if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                    textAnnotation = (IRI) t.getSubject();
                    break;
                }
            }
            if (textAnnotation == null) {
                //not found ... create a new one
                textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            } else {
                //if existing add this engine as contributor
                metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
            }
            //add dc:types (even to existing)
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
            textAnnotations.add(textAnnotation);
        }
        //now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            //should we use the label used for the match, or search the
            //representation for the best label ... currently its the matched one
            Literal label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            Entity entity = suggestion.getEntity();
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entity.getUri()));
            Iterator<IRI> suggestionTypes = entity.getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, suggestionTypes.next()));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            //add origin information of the EntiySearcher
            for (Entry<IRI, Collection<RDFTerm>> originInfo : entitySearcher.getOriginInformation().entrySet()) {
                for (RDFTerm value : originInfo.getValue()) {
                    metadata.add(new TripleImpl(entityAnnotation, originInfo.getKey(), value));
                }
            }
            if (writeRankings) {
                Float ranking = suggestion.getEntity().getEntityRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, //write the float as double
                    new TypedLiteralImpl(ranking.toString(), XSD_DOUBLE)));
                }
            }
            //add the RDF data for entities
            if (linkerConfig.isDereferenceEntitiesEnabled() && dereferencedEntitis.add(entity.getUri())) {
                //NOTE: do not add all triples as there might be other data in the graph
                for (Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null); triples.hasNext(); metadata.add(triples.next())) ;
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) Triple(org.apache.clerezza.commons.rdf.Triple) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet)

Example 58 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class LocationEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    IRI contentItemId = ci.getUri();
    Graph graph = ci.getMetadata();
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    //get all the textAnnotations
    /*
         * this Map holds the name as key and all the text annotations of
         * dc:type dbpedia:Place that select this name as value
         * this map is used to avoid multiple lookups for text annotations
         * selecting the same name.
         */
    Map<String, Collection<BlankNodeOrIRI>> name2placeEnhancementMap = new HashMap<String, Collection<BlankNodeOrIRI>>();
    Iterator<Triple> iterator = graph.filter(null, DC_TYPE, DBPEDIA_PLACE);
    while (iterator.hasNext()) {
        //the enhancement annotating an place
        BlankNodeOrIRI placeEnhancement = iterator.next().getSubject();
        //this can still be an TextAnnotation of an EntityAnnotation
        //so we need to filter TextAnnotation
        Triple isTextAnnotation = new TripleImpl(placeEnhancement, RDF_TYPE, ENHANCER_TEXTANNOTATION);
        if (graph.contains(isTextAnnotation)) {
            //now get the name
            String name = EnhancementEngineHelper.getString(graph, placeEnhancement, ENHANCER_SELECTED_TEXT);
            if (name == null) {
                log.warn("Unable to process TextAnnotation " + placeEnhancement + " because property" + ENHANCER_SELECTED_TEXT + " is not present");
            } else {
                Collection<BlankNodeOrIRI> placeEnhancements = name2placeEnhancementMap.get(name);
                if (placeEnhancements == null) {
                    placeEnhancements = new ArrayList<BlankNodeOrIRI>();
                    name2placeEnhancementMap.put(name, placeEnhancements);
                }
                placeEnhancements.add(placeEnhancement);
            }
        } else {
        //TODO: if we also ant to process EntityAnnotations with the dc:type dbpedia:Place
        //      than we need to parse the name based on the enhancer:entity-name property
        }
    }
    //Now we do have all the names we need to lookup
    Map<SearchRequestPropertyEnum, Collection<String>> requestParams = new EnumMap<SearchRequestPropertyEnum, Collection<String>>(SearchRequestPropertyEnum.class);
    if (getMaxLocationEnhancements() != null) {
        requestParams.put(SearchRequestPropertyEnum.maxRows, Collections.singleton(getMaxLocationEnhancements().toString()));
    }
    for (Map.Entry<String, Collection<BlankNodeOrIRI>> entry : name2placeEnhancementMap.entrySet()) {
        List<Toponym> results;
        try {
            requestParams.put(SearchRequestPropertyEnum.name, Collections.singleton(entry.getKey()));
            results = geonamesService.searchToponyms(requestParams);
        } catch (Exception e) {
            /*
                     * TODO: Review if it makes sense to catch here for each name, or
                     * to catch the whole loop.
                     * This depends if single requests can result in Exceptions
                     * (e.g. because of encoding problems) or if usually Exceptions
                     * are thrown because of general things like connection issues
                     * or service unavailability.
                     */
            throw new EngineException(this, ci, e);
        }
        if (results != null) {
            Double maxScore = results.isEmpty() ? null : results.get(0).getScore();
            for (Toponym result : results) {
                log.debug("process result {} {}", result.getGeoNameId(), result.getName());
                Double score = getToponymScore(result, maxScore);
                log.debug("  > score {}", score);
                if (score != null) {
                    if (score < minScore) {
                        //if score is lower than the under bound, than stop
                        break;
                    }
                } else {
                    log.warn("NULL returned as Score for " + result.getGeoNameId() + " " + result.getName());
                /*
                         * NOTE: If score is not present all suggestions are
                         * added as enhancements to the metadata of the content
                         * item.
                         */
                }
                //write the enhancement!
                BlankNodeOrIRI locationEnhancement = writeEntityEnhancement(contentItemId, graph, literalFactory, result, entry.getValue(), null, score);
                log.debug("  > {}  >= {}", score, minHierarchyScore);
                if (score != null && score >= minHierarchyScore) {
                    log.debug("  > getHierarchy for {} {}", result.getGeoNameId(), result.getName());
                    //get the hierarchy
                    try {
                        Iterator<Toponym> hierarchy = getHierarchy(result).iterator();
                        for (int level = 0; hierarchy.hasNext(); level++) {
                            Toponym hierarchyEntry = hierarchy.next();
                            //  maybe add an configuration
                            if (level == 0) {
                                //Mother earth -> ignore
                                continue;
                            }
                            //write it as dependent to the locationEnhancement
                            if (result.getGeoNameId() != hierarchyEntry.getGeoNameId()) {
                                //TODO: add additional checks based on possible
                                //      configuration here!
                                log.debug("    - write hierarchy {} {}", hierarchyEntry.getGeoNameId(), hierarchyEntry.getName());
                                /*
                                     * The hierarchy service dose not provide a score, because it would be 1.0
                                     * so we need to set the score to this value.
                                     * Currently is is set to the value of the suggested entry
                                     */
                                writeEntityEnhancement(contentItemId, graph, literalFactory, hierarchyEntry, null, Collections.singletonList(locationEnhancement), 1.0);
                            }
                        }
                    } catch (Exception e) {
                        log.warn("Unable to get Hierarchy for " + result.getGeoNameId() + " " + result.getName(), e);
                    }
                }
            }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) EnumMap(java.util.EnumMap) SearchRequestPropertyEnum(org.apache.stanbol.enhancer.engines.geonames.impl.GeonamesAPIWrapper.SearchRequestPropertyEnum) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) ConfigurationException(org.osgi.service.cm.ConfigurationException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) Collection(java.util.Collection) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap)

Example 59 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class EntityLinkingEngineTest method setUpServices.

@BeforeClass
public static void setUpServices() throws IOException {
    searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME, NAME, new SimpleLabelTokenizer());
    //add some terms to the searcher
    Graph graph = new IndexedGraph();
    IRI uri = new IRI("urn:test:PatrickMarshall");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:Geologist");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    graph.add(new TripleImpl(uri, REDIRECT, new IRI("urn:test:redirect:Geologist")));
    searcher.addEntity(new Entity(uri, graph));
    //a redirect
    uri = new IRI("urn:test:redirect:Geologist");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:NewZealand");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:UniversityOfOtago");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:University");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:Otago");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    //add a 2nd Otago (Place and University
    uri = new IRI("urn:test:Otago_Texas");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)")));
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:UniversityOfOtago_Texas");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
    searcher.addEntity(new Entity(uri, graph));
    TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT)));
    TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT_WO)));
    initAnalyzedText(TEST_ANALYSED_TEXT);
    TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
    TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    initAnalyzedText(TEST_ANALYSED_TEXT_WO);
    TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
    TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) SimpleLabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer) TestSearcherImpl(org.apache.stanbol.enhancer.engines.entitylinking.impl.TestSearcherImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) BeforeClass(org.junit.BeforeClass)

Example 60 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class EntityCoReferenceEngineTest method testSpatialCoref.

@Test
public void testSpatialCoref() throws EngineException, IOException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(SPATIAL_TEXT));
    Graph graph = ci.getMetadata();
    IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, engine);
    graph.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl("en")));
    graph.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, new PlainLiteralImpl("100.0")));
    graph.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
    Entry<IRI, Blob> textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    AnalysedText at = atFactory.createAnalysedText(ci, textBlob.getValue());
    Sentence sentence1 = at.addSentence(0, SPATIAL_SENTENCE_1.indexOf(".") + 1);
    Chunk angelaMerkel = sentence1.addChunk(0, "Angela Merkel".length());
    angelaMerkel.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(new NerTag("Angela Merkel", OntologicalClasses.DBPEDIA_PERSON)));
    Sentence sentence2 = at.addSentence(SPATIAL_SENTENCE_1.indexOf(".") + 1, SPATIAL_SENTENCE_1.length() + SPATIAL_SENTENCE_2.indexOf(".") + 1);
    int theStartIdx = sentence2.getSpan().indexOf("The");
    int germanStartIdx = sentence2.getSpan().indexOf("German");
    int chancellorStartIdx = sentence2.getSpan().indexOf("politician");
    Token the = sentence2.addToken(theStartIdx, theStartIdx + "The".length());
    the.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("The", LexicalCategory.PronounOrDeterminer, Pos.Determiner)));
    Token german = sentence2.addToken(germanStartIdx, germanStartIdx + "German".length());
    german.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("German", LexicalCategory.Adjective)));
    Token politician = sentence2.addToken(chancellorStartIdx, chancellorStartIdx + "politician".length());
    politician.addAnnotation(NlpAnnotations.POS_ANNOTATION, Value.value(new PosTag("politician", LexicalCategory.Noun)));
    Chunk theGermanChancellor = sentence2.addChunk(theStartIdx, chancellorStartIdx + "politician".length());
    theGermanChancellor.addAnnotation(NlpAnnotations.PHRASE_ANNOTATION, Value.value(new PhraseTag("The German politician", LexicalCategory.Noun)));
    engine.computeEnhancements(ci);
    Value<CorefFeature> representativeCorefValue = angelaMerkel.getAnnotation(NlpAnnotations.COREF_ANNOTATION);
    Assert.assertNotNull(representativeCorefValue);
    CorefFeature representativeCoref = representativeCorefValue.value();
    Assert.assertTrue(representativeCoref.isRepresentative());
    Assert.assertTrue(representativeCoref.getMentions().contains(theGermanChancellor));
    Value<CorefFeature> subordinateCorefValue = theGermanChancellor.getAnnotation(NlpAnnotations.COREF_ANNOTATION);
    Assert.assertNotNull(subordinateCorefValue);
    CorefFeature subordinateCoref = subordinateCorefValue.value();
    Assert.assertTrue(!subordinateCoref.isRepresentative());
    Assert.assertTrue(subordinateCoref.getMentions().contains(angelaMerkel));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) NerTag(org.apache.stanbol.enhancer.nlp.ner.NerTag) CorefFeature(org.apache.stanbol.enhancer.nlp.coref.CorefFeature) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Token(org.apache.stanbol.enhancer.nlp.model.Token) Chunk(org.apache.stanbol.enhancer.nlp.model.Chunk) PhraseTag(org.apache.stanbol.enhancer.nlp.phrase.PhraseTag) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) Graph(org.apache.clerezza.commons.rdf.Graph) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Sentence(org.apache.stanbol.enhancer.nlp.model.Sentence) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)172 IRI (org.apache.clerezza.commons.rdf.IRI)110 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)66 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)57 Triple (org.apache.clerezza.commons.rdf.Triple)45 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)43 Test (org.junit.Test)38 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)36 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)34 IOException (java.io.IOException)27 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)26 HashSet (java.util.HashSet)24 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)24 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)24 InputStream (java.io.InputStream)21 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)17 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)17 ArrayList (java.util.ArrayList)16 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)15