Search in sources :

Example 1 with Hit

use of org.molgenis.semanticsearch.semantic.Hit in project molgenis by molgenis.

the class SemanticSearchServiceImpl method findTags.

@Override
public Map<Attribute, Hit<OntologyTerm>> findTags(String entity, List<String> ontologyIds) {
    Map<Attribute, Hit<OntologyTerm>> result = new LinkedHashMap<>();
    EntityType emd = metaDataService.getEntityType(entity);
    for (Attribute amd : emd.getAtomicAttributes()) {
        Hit<OntologyTerm> tag = findTags(amd, ontologyIds);
        if (tag != null) {
            result.put(amd, tag);
        }
    }
    return result;
}
Also used : EntityType(org.molgenis.data.meta.model.EntityType) Hit(org.molgenis.semanticsearch.semantic.Hit) Attribute(org.molgenis.data.meta.model.Attribute) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) OntologyTerm(org.molgenis.ontology.core.model.OntologyTerm)

Example 2 with Hit

use of org.molgenis.semanticsearch.semantic.Hit in project molgenis by molgenis.

the class SemanticSearchServiceImpl method bestMatchingSynonym.

/**
 * Computes the best matching synonym which is closest to a set of search terms.<br/>
 * Will stem the {@link OntologyTerm} 's synonyms and the search terms, and then compute the maximum
 * {@link StringDistance} between them. 0 means disjunct, 1 means identical
 *
 * @param ontologyTerm the {@link OntologyTerm}
 * @param searchTerms  the search terms
 * @return the maximum {@link StringDistance} between the ontologyterm and the search terms
 */
public Hit<String> bestMatchingSynonym(OntologyTerm ontologyTerm, Set<String> searchTerms) {
    Stemmer stemmer = new Stemmer();
    Optional<Hit<String>> bestSynonym = ontologyTerm.getSynonyms().stream().map(synonym -> Hit.create(synonym, distanceFrom(synonym, searchTerms, stemmer))).max(Comparator.naturalOrder());
    return bestSynonym.get();
}
Also used : NGramDistanceAlgorithm(org.molgenis.semanticsearch.string.NGramDistanceAlgorithm) java.util(java.util) StringDistance(org.apache.lucene.search.spell.StringDistance) Operator(org.molgenis.data.QueryRule.Operator) LoggerFactory(org.slf4j.LoggerFactory) SemanticSearchService(org.molgenis.semanticsearch.service.SemanticSearchService) QueryImpl(org.molgenis.data.support.QueryImpl) StringUtils(org.apache.commons.lang3.StringUtils) Attribute(org.molgenis.data.meta.model.Attribute) MetaDataService(org.molgenis.data.meta.MetaDataService) Lists(com.google.common.collect.Lists) FluentIterable(com.google.common.collect.FluentIterable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OntologyTerm(org.molgenis.ontology.core.model.OntologyTerm) Objects.requireNonNull(java.util.Objects.requireNonNull) AttributeMetadata(org.molgenis.data.meta.model.AttributeMetadata) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) OntologyService(org.molgenis.ontology.core.service.OntologyService) Splitter(com.google.common.base.Splitter) ATTRIBUTE_META_DATA(org.molgenis.data.meta.model.AttributeMetadata.ATTRIBUTE_META_DATA) Hit(org.molgenis.semanticsearch.semantic.Hit) Stemmer(org.molgenis.semanticsearch.string.Stemmer) Explanation(org.apache.lucene.search.Explanation) Logger(org.slf4j.Logger) ElasticSearchExplainService(org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) EntityType(org.molgenis.data.meta.model.EntityType) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Ontology(org.molgenis.ontology.core.model.Ontology) Stream(java.util.stream.Stream) Ordering(com.google.common.collect.Ordering) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) DataService(org.molgenis.data.DataService) Query(org.molgenis.data.Query) QueryRule(org.molgenis.data.QueryRule) Joiner(com.google.common.base.Joiner) Entity(org.molgenis.data.Entity) Hit(org.molgenis.semanticsearch.semantic.Hit) Stemmer(org.molgenis.semanticsearch.string.Stemmer)

Example 3 with Hit

use of org.molgenis.semanticsearch.semantic.Hit in project molgenis by molgenis.

the class SemanticSearchServiceImpl method findTags.

@Override
public Hit<OntologyTerm> findTags(Attribute attribute, List<String> ontologyIds) {
    String description = attribute.getDescription() == null ? attribute.getLabel() : attribute.getDescription();
    Set<String> searchTerms = splitIntoTerms(description);
    Stemmer stemmer = new Stemmer();
    if (LOG.isDebugEnabled()) {
        LOG.debug("findOntologyTerms({},{},{})", ontologyIds, searchTerms, MAX_NUM_TAGS);
    }
    List<OntologyTerm> candidates = ontologyService.findOntologyTerms(ontologyIds, searchTerms, MAX_NUM_TAGS);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Candidates: {}", candidates);
    }
    List<Hit<OntologyTerm>> hits = candidates.stream().filter(ontologyTerm -> filterOntologyTerm(splitIntoTerms(Stemmer.stemAndJoin(searchTerms)), ontologyTerm, stemmer)).map(ontolgoyTerm -> Hit.create(ontolgoyTerm, bestMatchingSynonym(ontolgoyTerm, searchTerms).getScore())).sorted(Ordering.natural().reverse()).collect(Collectors.toList());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Hits: {}", hits);
    }
    Hit<OntologyTerm> result = null;
    String bestMatchingSynonym = null;
    for (Hit<OntologyTerm> hit : hits) {
        String bestMatchingSynonymForHit = bestMatchingSynonym(hit.getResult(), searchTerms).getResult();
        if (result == null) {
            result = hit;
            bestMatchingSynonym = bestMatchingSynonymForHit;
        } else {
            Set<String> jointTerms = Sets.union(splitIntoTerms(bestMatchingSynonym), splitIntoTerms(bestMatchingSynonymForHit));
            String joinedSynonyms = termJoiner.join(jointTerms);
            Hit<OntologyTerm> joinedHit = Hit.create(OntologyTerm.and(result.getResult(), hit.getResult()), distanceFrom(joinedSynonyms, searchTerms, stemmer));
            if (joinedHit.compareTo(result) > 0) {
                result = joinedHit;
                bestMatchingSynonym = bestMatchingSynonym + " " + bestMatchingSynonymForHit;
            }
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("result: {}", result);
        }
    }
    if (result != null && result.getScore() >= CUTOFF) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Tag {} with {}", attribute, result);
        }
        return result;
    }
    return null;
}
Also used : NGramDistanceAlgorithm(org.molgenis.semanticsearch.string.NGramDistanceAlgorithm) java.util(java.util) StringDistance(org.apache.lucene.search.spell.StringDistance) Operator(org.molgenis.data.QueryRule.Operator) LoggerFactory(org.slf4j.LoggerFactory) SemanticSearchService(org.molgenis.semanticsearch.service.SemanticSearchService) QueryImpl(org.molgenis.data.support.QueryImpl) StringUtils(org.apache.commons.lang3.StringUtils) Attribute(org.molgenis.data.meta.model.Attribute) MetaDataService(org.molgenis.data.meta.MetaDataService) Lists(com.google.common.collect.Lists) FluentIterable(com.google.common.collect.FluentIterable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OntologyTerm(org.molgenis.ontology.core.model.OntologyTerm) Objects.requireNonNull(java.util.Objects.requireNonNull) AttributeMetadata(org.molgenis.data.meta.model.AttributeMetadata) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) OntologyService(org.molgenis.ontology.core.service.OntologyService) Splitter(com.google.common.base.Splitter) ATTRIBUTE_META_DATA(org.molgenis.data.meta.model.AttributeMetadata.ATTRIBUTE_META_DATA) Hit(org.molgenis.semanticsearch.semantic.Hit) Stemmer(org.molgenis.semanticsearch.string.Stemmer) Explanation(org.apache.lucene.search.Explanation) Logger(org.slf4j.Logger) ElasticSearchExplainService(org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) EntityType(org.molgenis.data.meta.model.EntityType) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Ontology(org.molgenis.ontology.core.model.Ontology) Stream(java.util.stream.Stream) Ordering(com.google.common.collect.Ordering) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) DataService(org.molgenis.data.DataService) Query(org.molgenis.data.Query) QueryRule(org.molgenis.data.QueryRule) Joiner(com.google.common.base.Joiner) Entity(org.molgenis.data.Entity) Hit(org.molgenis.semanticsearch.semantic.Hit) Stemmer(org.molgenis.semanticsearch.string.Stemmer) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) OntologyTerm(org.molgenis.ontology.core.model.OntologyTerm)

Aggregations

Attribute (org.molgenis.data.meta.model.Attribute)3 EntityType (org.molgenis.data.meta.model.EntityType)3 OntologyTerm (org.molgenis.ontology.core.model.OntologyTerm)3 ExplainedAttribute (org.molgenis.semanticsearch.explain.bean.ExplainedAttribute)3 Hit (org.molgenis.semanticsearch.semantic.Hit)3 Joiner (com.google.common.base.Joiner)2 Splitter (com.google.common.base.Splitter)2 FluentIterable (com.google.common.collect.FluentIterable)2 Lists (com.google.common.collect.Lists)2 Ordering (com.google.common.collect.Ordering)2 Sets (com.google.common.collect.Sets)2 java.util (java.util)2 Objects.requireNonNull (java.util.Objects.requireNonNull)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 StringUtils (org.apache.commons.lang3.StringUtils)2 QueryParser (org.apache.lucene.queryparser.classic.QueryParser)2 Explanation (org.apache.lucene.search.Explanation)2 StringDistance (org.apache.lucene.search.spell.StringDistance)2