use of org.molgenis.semanticsearch.string.Stemmer in project molgenis by molgenis.
the class SemanticSearchServiceImplTest method testDistanceFrom.
@Test
public void testDistanceFrom() {
Stemmer stemmer = new Stemmer();
Assert.assertEquals(semanticSearchService.distanceFrom("Hypertension", ImmutableSet.of("history", "hypertens"), stemmer), .6923, 0.0001, "String distance should be equal");
Assert.assertEquals(semanticSearchService.distanceFrom("Maternal Hypertension", ImmutableSet.of("history", "hypertens"), stemmer), .5454, 0.0001, "String distance should be equal");
}
use of org.molgenis.semanticsearch.string.Stemmer in project molgenis by molgenis.
the class SemanticSearchServiceImpl method bestMatchingSynonym.
/**
* Computes the best matching synonym which is closest to a set of search terms.<br/>
* Will stem the {@link OntologyTerm} 's synonyms and the search terms, and then compute the maximum
* {@link StringDistance} between them. 0 means disjunct, 1 means identical
*
* @param ontologyTerm the {@link OntologyTerm}
* @param searchTerms the search terms
* @return the maximum {@link StringDistance} between the ontologyterm and the search terms
*/
public Hit<String> bestMatchingSynonym(OntologyTerm ontologyTerm, Set<String> searchTerms) {
Stemmer stemmer = new Stemmer();
Optional<Hit<String>> bestSynonym = ontologyTerm.getSynonyms().stream().map(synonym -> Hit.create(synonym, distanceFrom(synonym, searchTerms, stemmer))).max(Comparator.naturalOrder());
return bestSynonym.get();
}
use of org.molgenis.semanticsearch.string.Stemmer in project molgenis by molgenis.
the class SemanticSearchServiceImpl method findTags.
@Override
public Hit<OntologyTerm> findTags(Attribute attribute, List<String> ontologyIds) {
String description = attribute.getDescription() == null ? attribute.getLabel() : attribute.getDescription();
Set<String> searchTerms = splitIntoTerms(description);
Stemmer stemmer = new Stemmer();
if (LOG.isDebugEnabled()) {
LOG.debug("findOntologyTerms({},{},{})", ontologyIds, searchTerms, MAX_NUM_TAGS);
}
List<OntologyTerm> candidates = ontologyService.findOntologyTerms(ontologyIds, searchTerms, MAX_NUM_TAGS);
if (LOG.isDebugEnabled()) {
LOG.debug("Candidates: {}", candidates);
}
List<Hit<OntologyTerm>> hits = candidates.stream().filter(ontologyTerm -> filterOntologyTerm(splitIntoTerms(Stemmer.stemAndJoin(searchTerms)), ontologyTerm, stemmer)).map(ontolgoyTerm -> Hit.create(ontolgoyTerm, bestMatchingSynonym(ontolgoyTerm, searchTerms).getScore())).sorted(Ordering.natural().reverse()).collect(Collectors.toList());
if (LOG.isDebugEnabled()) {
LOG.debug("Hits: {}", hits);
}
Hit<OntologyTerm> result = null;
String bestMatchingSynonym = null;
for (Hit<OntologyTerm> hit : hits) {
String bestMatchingSynonymForHit = bestMatchingSynonym(hit.getResult(), searchTerms).getResult();
if (result == null) {
result = hit;
bestMatchingSynonym = bestMatchingSynonymForHit;
} else {
Set<String> jointTerms = Sets.union(splitIntoTerms(bestMatchingSynonym), splitIntoTerms(bestMatchingSynonymForHit));
String joinedSynonyms = termJoiner.join(jointTerms);
Hit<OntologyTerm> joinedHit = Hit.create(OntologyTerm.and(result.getResult(), hit.getResult()), distanceFrom(joinedSynonyms, searchTerms, stemmer));
if (joinedHit.compareTo(result) > 0) {
result = joinedHit;
bestMatchingSynonym = bestMatchingSynonym + " " + bestMatchingSynonymForHit;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("result: {}", result);
}
}
if (result != null && result.getScore() >= CUTOFF) {
if (LOG.isDebugEnabled()) {
LOG.debug("Tag {} with {}", attribute, result);
}
return result;
}
return null;
}
Aggregations