Examples with Entity - org.apache.stanbol.enhancer.engines.entitylinking.Entity

Example 1 with Entity

use of org.apache.stanbol.enhancer.engines.entitylinking.Entity in project stanbol by apache.

the class EntityCoMentionEngine method writeComentions.

private void writeComentions(ContentItem ci, Collection<LinkedEntity> comentions, String language, Set<IRI> textAnnotations) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    //we MUST adjust the confidence level of existing annotations only once
    //se we need to keep track of those
    Set<BlankNodeOrIRI> adjustedSuggestions = new HashSet<BlankNodeOrIRI>();
    log.debug("Write Co-Mentions:");
    for (LinkedEntity comention : comentions) {
        log.debug(" > {}", comention);
        //URIs of TextAnnotations for the initial mention of this co-mention
        Collection<IRI> initialMentions = new ArrayList<IRI>(comention.getSuggestions().size());
        for (Suggestion suggestion : comention.getSuggestions()) {
            Entity entity = suggestion.getEntity();
            if (textAnnotations.contains(entity.getUri())) {
                //                if(entity.getData().filter(entity.getUri(),RDF_TYPE,ENHANCER_TEXTANNOTATION).hasNext()){
                //this is a textAnnotation
                initialMentions.add(entity.getUri());
            }
        //else TODO support also Entities!!
        }
        //create the TextAnnotations for the co-mention
        for (Occurrence occurrence : comention.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            //search for existing text annotation
            boolean ignore = false;
            //search for textAnnotations with the same end
            IRI textAnnotation = null;
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer end = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_END, Integer.class, literalFactory);
                if (end != null && textAnnotations.contains(t.getSubject())) {
                    //metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (end > occurrence.getEnd()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        //so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            it = metadata.filter(null, ENHANCER_END, endLiteral);
            while (it.hasNext()) {
                Triple t = it.next();
                Integer start = EnhancementEngineHelper.get(metadata, t.getSubject(), ENHANCER_START, Integer.class, literalFactory);
                if (start != null && textAnnotations.contains(t.getSubject())) {
                    //metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()){
                    textAnnotation = (IRI) t.getSubject();
                    if (start < occurrence.getStart()) {
                        // there is an other TextAnnotation selecting a bigger Span
                        //so we should ignore this Occurrence
                        ignore = true;
                    }
                }
            }
            if (!ignore) {
                //collect confidence values of co-mentions
                //maximum confidence of suggestions of the initial mention
                Double maxConfidence = null;
                //maximum confidence of existing suggestions
                Double maxExistingConfidence = null;
                if (textAnnotation == null) {
                    //not found ... create a new TextAnnotation for the co-mention
                    textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                    //add it to the set of TextAnnotations
                    textAnnotations.add(textAnnotation);
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                    metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                } else {
                    //if existing add this engine as contributor
                    metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
                //maxConfidence = EnhancementEngineHelper.get(metadata, textAnnotation, 
                //    ENHANCER_CONFIDENCE, Double.class, literalFactory);
                }
                //now process initial mention(s) for the co-mention
                Set<IRI> dcTypes = new HashSet<IRI>();
                for (IRI initialMention : initialMentions) {
                    //get the dc:type(s) of the initial mentions
                    Iterator<IRI> dcTypesIt = getReferences(metadata, initialMention, DC_TYPE);
                    while (dcTypesIt.hasNext()) {
                        dcTypes.add(dcTypesIt.next());
                    }
                    //check confidence of the initial mention (fise:TextAnnotation)
                    Double confidnece = EnhancementEngineHelper.get(metadata, initialMention, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                    if (confidnece != null) {
                        if (maxConfidence == null) {
                            maxConfidence = confidnece;
                        } else if (maxConfidence.compareTo(confidnece) <= 0) {
                            maxConfidence = confidnece;
                        }
                    }
                    //else nothing to do
                    //now we need to compare the suggestions of the initial
                    //mention(s) with the existing one. 
                    //Get information about the suggestions of the initial mention
                    Map<RDFTerm, Double> initialSuggestions = new HashMap<RDFTerm, Double>();
                    Map<RDFTerm, RDFTerm> initialSuggestedEntities = new HashMap<RDFTerm, RDFTerm>();
                    for (Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext(); ) {
                        if (!textAnnotations.contains(suggestions)) {
                            BlankNodeOrIRI suggestion = suggestions.next().getSubject();
                            RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, suggestion, ENHANCER_ENTITY_REFERENCE);
                            if (suggestedEntity != null) {
                                //it has a suggestion
                                Double confidence = EnhancementEngineHelper.get(metadata, suggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                if (maxConfidence == null) {
                                    maxConfidence = confidence;
                                } else if (confidnece != null && maxConfidence.compareTo(confidnece) <= 0) {
                                    maxConfidence = confidnece;
                                }
                                //else nothing to do
                                initialSuggestions.put(suggestion, confidence);
                                initialSuggestedEntities.put(suggestedEntity, suggestion);
                            }
                        //no suggestion (dc:relation to some other resource)
                        }
                    // else ignore dc:relation to other fise:TextAnnotations
                    }
                    //now we collect existing Suggestions for this TextAnnoation where we need
                    //to adjust the confidence (quite some things to check ....)
                    Map<BlankNodeOrIRI, Double> existingSuggestions = new HashMap<BlankNodeOrIRI, Double>();
                    if (maxConfidence != null && confidenceAdjustmentFactor < 1) {
                        //suggestions are defined by incoming dc:releation
                        for (Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation); esIt.hasNext(); ) {
                            BlankNodeOrIRI existingSuggestion = esIt.next().getSubject();
                            //but not all of them are suggestions
                            if (!textAnnotations.contains(existingSuggestion)) {
                                //ignore fise:TextAnnotations
                                Double existingConfidence = EnhancementEngineHelper.get(metadata, existingSuggestion, ENHANCER_CONFIDENCE, Double.class, literalFactory);
                                //ignore fise:TextAnnotations also suggested for the initial mention
                                if (!initialSuggestions.containsKey(existingSuggestion)) {
                                    RDFTerm suggestedEntity = EnhancementEngineHelper.getReference(metadata, existingSuggestion, ENHANCER_ENTITY_REFERENCE);
                                    //suggestions for the initial mention
                                    if (!initialSuggestedEntities.containsKey(suggestedEntity)) {
                                        //finally make sure that we adjust confidences only once
                                        if (!adjustedSuggestions.contains(existingSuggestion)) {
                                            existingSuggestions.put(existingSuggestion, existingConfidence);
                                        }
                                    //else confidence already adjusted
                                    } else {
                                        // different fise:EntityAnnotation, but same reference Entity
                                        //we need to check confidences to decide what to do
                                        RDFTerm initialSuggestion = initialSuggestedEntities.get(suggestedEntity);
                                        Double initialConfidence = initialSuggestions.get(initialSuggestion);
                                        if (initialConfidence == null || (existingConfidence != null && existingConfidence.compareTo(initialConfidence) >= 0)) {
                                            //existing confidence >= initial .. keep existing
                                            initialSuggestions.remove(initialSuggestion);
                                            if (maxExistingConfidence == null) {
                                                maxExistingConfidence = existingConfidence;
                                            } else if (maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                                maxExistingConfidence = existingConfidence;
                                            }
                                        } else {
                                            //adjust this one (if not yet adjusted)
                                            if (!adjustedSuggestions.contains(existingSuggestion)) {
                                                existingSuggestions.put(existingSuggestion, existingConfidence);
                                            }
                                        }
                                    }
                                } else {
                                    //a initial mention already present
                                    //no need to process initial mention
                                    initialSuggestions.remove(existingSuggestion);
                                    if (maxExistingConfidence == null) {
                                        maxExistingConfidence = existingConfidence;
                                    } else if (existingConfidence != null && maxExistingConfidence.compareTo(existingConfidence) <= 0) {
                                        maxExistingConfidence = existingConfidence;
                                    }
                                //else maxExistingConfidence == null (undefined)
                                }
                            }
                        //else ignore dc:relations to other fise:TextAnnotations
                        }
                        for (Entry<BlankNodeOrIRI, Double> entry : existingSuggestions.entrySet()) {
                            if (entry.getValue() != null) {
                                double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
                                if (maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence) {
                                    maxExistingConfidence = adjustedConfidence;
                                }
                                EnhancementEngineHelper.set(metadata, entry.getKey(), ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
                                //mark as adjusted
                                adjustedSuggestions.add(entry.getKey());
                            }
                        }
                    }
                    //add the suggestions of the initial mention to this one
                    for (RDFTerm suggestion : initialSuggestions.keySet()) {
                        metadata.add(new TripleImpl((BlankNodeOrIRI) suggestion, DC_RELATION, textAnnotation));
                    }
                    //finally link the co-mentation with the initial one
                    metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
                //metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
                }
                // Adapt the dc:type values of the fise:TextAnnotation
                // - if Suggestions added by this engine do have the max confidence
                //   use the dc:type values of the initial mention
                // - if the original suggestions do have a higher confidence keep the
                //   existing
                // - in case both do have the same confidence we add all dc:types
                boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null || maxConfidence.compareTo(maxExistingConfidence) >= 0);
                boolean addCoMentionDcTypes = maxExistingConfidence == null || (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
                Iterator<IRI> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
                while (existingDcTypesIt.hasNext()) {
                    //removeExistingDcTypes == true
                    if ((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes) && removeExistingDcTypes) {
                        //remove the dcType
                        existingDcTypesIt.remove();
                    }
                }
                if (addCoMentionDcTypes) {
                    for (IRI dcType : dcTypes) {
                        //add missing
                        metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
                    }
                }
                //TODO: support also Entities
                if (maxConfidence != null) {
                    //set the confidence value (if known)
                    EnhancementEngineHelper.set(metadata, textAnnotation, ENHANCER_CONFIDENCE, maxConfidence, literalFactory);
                }
            }
        //else ignore this occurence
        }
    }
}

Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph)

Example 2 with Entity

use of org.apache.stanbol.enhancer.engines.entitylinking.Entity in project stanbol by apache.

the class EntityhubSearcher method lookup.

@Override
public Collection<? extends Entity> lookup(IRI field, Set<IRI> includeFields, List<String> search, String[] languages, Integer limit, Integer offset) throws EntitySearcherException {
    Entityhub entityhub = getSearchService();
    if (entityhub == null) {
        throw new EntitySearcherException("The Entityhub is currently not active");
    }
    FieldQuery query = EntitySearcherUtils.createFieldQuery(entityhub.getQueryFactory(), field, includeFields, search, languages);
    if (limit != null && limit > 0) {
        query.setLimit(limit);
    } else if (this.limit != null) {
        query.setLimit(this.limit);
    }
    if (offset != null && offset.intValue() > 0) {
        query.setOffset(offset.intValue());
    }
    QueryResultList<Representation> results;
    try {
        results = entityhub.find(query);
    } catch (EntityhubException e) {
        throw new EntitySearcherException("Exception while searchign for " + search + '@' + Arrays.toString(languages) + "in the Entityhub", e);
    }
    if (!results.isEmpty()) {
        Set<String> languagesSet = new HashSet<String>(Arrays.asList(languages));
        Collection<Entity> entities = new ArrayList<Entity>(results.size());
        for (Representation result : results) {
            entities.add(new EntityhubEntity(result, null, languagesSet));
        }
        return entities;
    } else {
        return Collections.emptyList();
    }
}

Also used : FieldQuery(org.apache.stanbol.entityhub.servicesapi.query.FieldQuery) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) EntitySearcherException(org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcherException) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) Entityhub(org.apache.stanbol.entityhub.servicesapi.Entityhub) HashSet(java.util.HashSet)

Example 3 with Entity

use of org.apache.stanbol.enhancer.engines.entitylinking.Entity in project stanbol by apache.

the class EntityLinkingEngine method writeEnhancements.

/**
     * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
     * extracted from the parsed ContentItem
     * @param ci
     * @param linkedEntities
     * @param language
     */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Set<IRI> dereferencedEntitis = new HashSet<IRI>();
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        //first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            //search for existing text annotation
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            IRI textAnnotation = null;
            while (it.hasNext()) {
                Triple t = it.next();
                if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                    textAnnotation = (IRI) t.getSubject();
                    break;
                }
            }
            if (textAnnotation == null) {
                //not found ... create a new one
                textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            } else {
                //if existing add this engine as contributor
                metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
            }
            //add dc:types (even to existing)
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
            textAnnotations.add(textAnnotation);
        }
        //now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            //should we use the label used for the match, or search the
            //representation for the best label ... currently its the matched one
            Literal label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            Entity entity = suggestion.getEntity();
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entity.getUri()));
            Iterator<IRI> suggestionTypes = entity.getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, suggestionTypes.next()));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            //add origin information of the EntiySearcher
            for (Entry<IRI, Collection<RDFTerm>> originInfo : entitySearcher.getOriginInformation().entrySet()) {
                for (RDFTerm value : originInfo.getValue()) {
                    metadata.add(new TripleImpl(entityAnnotation, originInfo.getKey(), value));
                }
            }
            if (writeRankings) {
                Float ranking = suggestion.getEntity().getEntityRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, //write the float as double
                    new TypedLiteralImpl(ranking.toString(), XSD_DOUBLE)));
                }
            }
            //add the RDF data for entities
            if (linkerConfig.isDereferenceEntitiesEnabled() && dereferencedEntitis.add(entity.getUri())) {
                //NOTE: do not add all triples as there might be other data in the graph
                for (Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null); triples.hasNext(); metadata.add(triples.next())) ;
            }
        }
    }
}

Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) Triple(org.apache.clerezza.commons.rdf.Triple) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet)

Example 4 with Entity

use of org.apache.stanbol.enhancer.engines.entitylinking.Entity in project stanbol by apache.

the class InMemoryEntityIndex method addEntity.

public void addEntity(Entity entity) {
    if (log.isDebugEnabled()) {
        log.debug(" > register {}", entity);
    }
    entities.put(entity.getUri(), entity);
    Iterator<Literal> labels = entity.getText(nameField);
    while (labels.hasNext()) {
        Literal label = labels.next();
        String lang = label.getLanguage() == null ? null : label.getLanguage().toString();
        if (indexLanguages.contains(lang)) {
            for (String token : tokenizer.tokenize(label.getLexicalForm(), null)) {
                token = token.toLowerCase(Locale.ROOT);
                Collection<Entity> values = index.get(token);
                if (values == null) {
                    values = new ArrayList<Entity>();
                    index.put(token, values);
                }
                values.add(entity);
            }
        }
    //else ignore labels in other languages
    }
}

Also used : Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) Literal(org.apache.clerezza.commons.rdf.Literal)

Example 5 with Entity

use of org.apache.stanbol.enhancer.engines.entitylinking.Entity in project stanbol by apache.

the class InMemoryEntityIndex method lookup.

@Override
public Collection<? extends Entity> lookup(IRI field, Set<IRI> includeFields, List<String> search, String[] languages, Integer numResults, Integer offset) throws IllegalStateException {
    //the nameField is the field
    assert nameField.equals(field);
    //the parsed languages include the language
    assert Arrays.asList(languages).contains(language);
    //NOTES: 
    // We can ignore the following parameters
    // * includeFields: as we will return the Entities as added to the index
    //The Syntax requires to
    //  * AND over the tokenized elements of the search List
    //  * OR over the elements in the search
    //  * Elements that do match more search elements need to be ranked first
    Map<Entity, int[]> results = new HashMap<Entity, int[]>();
    for (String qe : search) {
        Set<Entity> qeResult = join(tokenizer.tokenize(qe, language));
        for (Entity e : qeResult) {
            int[] count = results.get(e);
            if (count != null) {
                count[0] = count[0] + qe.length();
            } else {
                results.put(e, new int[] { qe.length() });
            }
        }
    }
    //TODO how to create generic arrays
    @SuppressWarnings("unchecked") Entry<Entity, int[]>[] resultArray = results.entrySet().toArray(new Entry[results.size()]);
    int index;
    if (offset != null && offset.intValue() > 0) {
        index = offset.intValue();
    } else {
        index = 0;
    }
    if (index >= resultArray.length) {
        //no more results
        return Collections.emptyList();
    }
    //final ranking
    Arrays.sort(resultArray, RESULT_SCORE_COMPARATOR);
    List<Entity> resultList = new ArrayList<Entity>(Math.min(numResults + 3, (resultArray.length - index)));
    int lastScore = -1;
    boolean done = false;
    //start at the parsed offset
    for (; index < resultArray.length && !done; index++) {
        if (index < numResults) {
            resultList.add(resultArray[index].getKey());
            if (index == (numResults - 1)) {
                //memorize the score of the last included
                lastScore = resultArray[index].getValue()[0];
            }
        } else if (lastScore == resultArray[index].getValue()[0]) {
            //include additional results with the same score
            resultList.add(resultArray[index].getKey());
        } else {
            //cut of
            done = true;
        }
    }
    return resultList;
}

Also used : Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry)

Aggregations

Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)13 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Literal (org.apache.clerezza.commons.rdf.Literal)5 Graph (org.apache.clerezza.commons.rdf.Graph)4 IRI (org.apache.clerezza.commons.rdf.IRI)4 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)4 Language (org.apache.clerezza.commons.rdf.Language)3 Triple (org.apache.clerezza.commons.rdf.Triple)3 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)3 LinkedEntity (org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity)3 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)2 Occurrence (org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence)2 Suggestion (org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion)2 NlpEngineHelper.getLanguage (org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage)2 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)2 FieldQuery (org.apache.stanbol.entityhub.servicesapi.query.FieldQuery)2 Entry (java.util.Map.Entry)1