Search in sources :

Example 71 with Explanation

use of org.apache.lucene.search.Explanation in project lucene-solr by apache.

the class CustomScoreProvider method customExplain.

/**
   * Explain the custom score.
   * Whenever overriding {@link #customScore(int, float, float[])}, 
   * this method should also be overridden to provide the correct explanation
   * for the part of the custom scoring.
   *  
   * @param doc doc being explained.
   * @param subQueryExpl explanation for the sub-query part.
   * @param valSrcExpls explanation for the value source part.
   * @return an explanation for the custom score
   */
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation[] valSrcExpls) throws IOException {
    if (valSrcExpls.length == 1) {
        return customExplain(doc, subQueryExpl, valSrcExpls[0]);
    }
    if (valSrcExpls.length == 0) {
        return subQueryExpl;
    }
    float valSrcScore = 1;
    for (Explanation valSrcExpl : valSrcExpls) {
        valSrcScore *= valSrcExpl.getValue();
    }
    List<Explanation> subs = new ArrayList<>();
    subs.add(subQueryExpl);
    for (Explanation valSrcExpl : valSrcExpls) {
        subs.add(valSrcExpl);
    }
    return Explanation.match(valSrcScore * subQueryExpl.getValue(), "custom score: product of:", subs);
}
Also used : Explanation(org.apache.lucene.search.Explanation) ArrayList(java.util.ArrayList)

Example 72 with Explanation

use of org.apache.lucene.search.Explanation in project molgenis by molgenis.

the class SearchServiceIT method testSemanticSearch.

@Test
public void testSemanticSearch() {
    List<Entity> attributes = createDynamic(6).collect(toList());
    attributes.get(0).set(ATTR_STRING, "High chance of pulmonary disease");
    attributes.get(1).set(ATTR_STRING, "And now for something completely different...");
    attributes.get(2).set(ATTR_STRING, "Are you taking hypertensive medication?");
    attributes.get(3).set(ATTR_STRING, "Have you ever had high blood pressure? (Repeat) (1)");
    attributes.get(4).set(ATTR_STRING, "Do you suffer from Ocular hypertension?");
    attributes.get(5).set(ATTR_STRING, "Do you have a vascular disorder?");
    Entity ontology1 = attributes.get(0).getEntity(ATTR_CATEGORICAL);
    Entity ontology2 = attributes.get(1).getEntity(ATTR_CATEGORICAL);
    for (Entity term : attributes) {
        term.set(ATTR_CATEGORICAL, ontology1);
    }
    attributes.get(5).set(ATTR_CATEGORICAL, ontology2);
    searchService.index(entityTypeDynamic, attributes.stream());
    searchService.refreshIndex();
    List<String> queryTerms = asList("hypertension", "disorder vascular hypertensive", "increased pressure blood", "high pressure blood", "ocular^0.5 hypertension^0.5", "hypertension^0.25 idiopathic^0.25 pulmonary^0.25");
    QueryRule finalDisMaxQuery = new QueryRule(queryTerms.stream().flatMap(term -> Stream.of(new QueryRule(ATTR_STRING, FUZZY_MATCH, term), new QueryRule(ATTR_SCRIPT, FUZZY_MATCH, term))).collect(toList()));
    finalDisMaxQuery.setOperator(DIS_MAX);
    List<String> attributeIds = asList("0", "1", "2", "3", "4", "5");
    Query<Entity> query = new QueryImpl<>(asList(new QueryRule(ATTR_ID, IN, attributeIds), new QueryRule(AND), finalDisMaxQuery));
    List<Object> matchingAttributeIDs = searchService.search(entityTypeDynamic, query).collect(toList());
    assertEquals(matchingAttributeIDs.get(0), "3");
    assertEquals(matchingAttributeIDs.get(1), "5");
    assertFalse(matchingAttributeIDs.contains("1"));
    List<Explanation> explanations = attributeIds.stream().map(id -> explainService.explain(query, entityTypeDynamic, id)).collect(toList());
    List<Float> scores = explanations.stream().map(Explanation::getValue).collect(toList());
    // FIXME these scores vary between runs
    // assertEquals(scores, asList(0.3463153, 0, 0.7889965, 1.7814579, 0.76421005, 1.0707202));
    Map<String, String> expandedQueryMap = new HashMap<>();
    for (String term : asList("hypertens", "disord vascular hypertens", "increased pressur blood", "high pressur blood", "ocular hypertens", "hypertens idiopathic pulmonary")) {
        expandedQueryMap.put(term, "hypertension");
    }
    List<Set<ExplainedQueryString>> explanationStrings = explanations.stream().map(explanation -> explainService.findQueriesFromExplanation(expandedQueryMap, explanation)).collect(toList());
    List<Set<ExplainedQueryString>> expectedExplanationStrings = asList(// High chance of pulmonary disease
    singleton(ExplainedQueryString.create("high", "high pressur blood", "hypertension", 41.66666666666667)), // And now for something completely different...
    emptySet(), // Are you taking hypertensive medication?
    singleton(ExplainedQueryString.create("hypertens", "hypertens", "hypertension", 100.0)), // Have you ever had high blood pressure? (Repeat) (1)
    singleton(ExplainedQueryString.create("high pressur blood", "high pressur blood", "hypertension", 100.0)), // Do you suffer from Ocular hypertension?
    singleton(ExplainedQueryString.create("ocular hypertens", "ocular hypertens", "hypertension", 100.0)), // Do you have a vascular disorder?
    singleton(ExplainedQueryString.create("disord vascular", "disord vascular hypertens", "hypertension", 78.04878048780488)));
    assertEquals(explanationStrings, expectedExplanationStrings);
}
Also used : DataProvider(org.testng.annotations.DataProvider) Operator(org.molgenis.data.QueryRule.Operator) MolgenisDateFormat.parseLocalDate(org.molgenis.data.util.MolgenisDateFormat.parseLocalDate) Autowired(org.springframework.beans.factory.annotation.Autowired) Test(org.testng.annotations.Test) HashMap(java.util.HashMap) QueryImpl(org.molgenis.data.support.QueryImpl) AfterMethod(org.testng.annotations.AfterMethod) Supplier(java.util.function.Supplier) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) ElasticsearchService(org.molgenis.data.elasticsearch.ElasticsearchService) AbstractTestNGSpringContextTests(org.springframework.test.context.testng.AbstractTestNGSpringContextTests) EntityTestHarness(org.molgenis.data.EntityTestHarness) Explanation(org.apache.lucene.search.Explanation) org.molgenis.data(org.molgenis.data) ElasticSearchExplainService(org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService) MolgenisDateFormat.parseInstant(org.molgenis.data.util.MolgenisDateFormat.parseInstant) BeforeMethod(org.testng.annotations.BeforeMethod) Set(java.util.Set) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) EntityType(org.molgenis.data.meta.model.EntityType) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Stream(java.util.stream.Stream) ContextConfiguration(org.springframework.test.context.ContextConfiguration) UnknownIndexException(org.molgenis.data.index.exception.UnknownIndexException) Collections(java.util.Collections) Set(java.util.Set) HashMap(java.util.HashMap) Explanation(org.apache.lucene.search.Explanation) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) QueryImpl(org.molgenis.data.support.QueryImpl) Test(org.testng.annotations.Test)

Example 73 with Explanation

use of org.apache.lucene.search.Explanation in project molgenis by molgenis.

the class ElasticSearchExplainServiceImplTest method testDiscoverMatchedQueries.

@Test
public void testDiscoverMatchedQueries() {
    Explanation explanation_2 = Explanation.match(Float.valueOf("3.6267629"), "sum of:", Explanation.match(Float.valueOf("2.0587344"), "weight(label:high in 328) [PerFieldSimilarity], result of:"), Explanation.match(Float.valueOf("1.5680285"), "weight(label:blood in 328) [PerFieldSimilarity], result of:"));
    Explanation explanation_3 = Explanation.match(Float.valueOf("1.754909"), "max of:", Explanation.match(Float.valueOf("1.754909"), "weight(label:medication in 328) [PerFieldSimilarity], result of:"));
    Explanation explanation_1 = Explanation.match(Float.valueOf("5.381672"), "sum of:", explanation_2, explanation_3);
    Set<String> actual = explainServiceHelper.findMatchedWords(explanation_1);
    assertEquals(actual.size(), 2);
    assertTrue(actual.contains("high blood"));
    assertTrue(actual.contains("medication"));
}
Also used : Explanation(org.apache.lucene.search.Explanation) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) Test(org.testng.annotations.Test)

Example 74 with Explanation

use of org.apache.lucene.search.Explanation in project molgenis by molgenis.

the class SemanticSearchServiceImpl method isSingleMatchHighQuality.

boolean isSingleMatchHighQuality(Collection<String> queryTerms, Collection<String> ontologyTermQueries, Iterable<ExplainedQueryString> explanations) {
    Map<String, Double> matchedTags = new HashMap<>();
    for (ExplainedQueryString explanation : explanations) {
        matchedTags.put(explanation.getTagName().toLowerCase(), explanation.getScore());
    }
    ontologyTermQueries.removeAll(queryTerms);
    if (!queryTerms.isEmpty() && queryTerms.stream().anyMatch(token -> isGoodMatch(matchedTags, token)))
        return true;
    if (!ontologyTermQueries.isEmpty() && ontologyTermQueries.stream().allMatch(token -> isGoodMatch(matchedTags, token)))
        return true;
    return false;
}
Also used : NGramDistanceAlgorithm(org.molgenis.semanticsearch.string.NGramDistanceAlgorithm) java.util(java.util) StringDistance(org.apache.lucene.search.spell.StringDistance) Operator(org.molgenis.data.QueryRule.Operator) LoggerFactory(org.slf4j.LoggerFactory) SemanticSearchService(org.molgenis.semanticsearch.service.SemanticSearchService) QueryImpl(org.molgenis.data.support.QueryImpl) StringUtils(org.apache.commons.lang3.StringUtils) Attribute(org.molgenis.data.meta.model.Attribute) MetaDataService(org.molgenis.data.meta.MetaDataService) Lists(com.google.common.collect.Lists) FluentIterable(com.google.common.collect.FluentIterable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) OntologyTerm(org.molgenis.ontology.core.model.OntologyTerm) Objects.requireNonNull(java.util.Objects.requireNonNull) AttributeMetadata(org.molgenis.data.meta.model.AttributeMetadata) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) OntologyService(org.molgenis.ontology.core.service.OntologyService) Splitter(com.google.common.base.Splitter) ATTRIBUTE_META_DATA(org.molgenis.data.meta.model.AttributeMetadata.ATTRIBUTE_META_DATA) Hit(org.molgenis.semanticsearch.semantic.Hit) Stemmer(org.molgenis.semanticsearch.string.Stemmer) Explanation(org.apache.lucene.search.Explanation) Logger(org.slf4j.Logger) ElasticSearchExplainService(org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) EntityType(org.molgenis.data.meta.model.EntityType) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) Ontology(org.molgenis.ontology.core.model.Ontology) Stream(java.util.stream.Stream) Ordering(com.google.common.collect.Ordering) QueryParser(org.apache.lucene.queryparser.classic.QueryParser) DataService(org.molgenis.data.DataService) Query(org.molgenis.data.Query) QueryRule(org.molgenis.data.QueryRule) Joiner(com.google.common.base.Joiner) Entity(org.molgenis.data.Entity) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString)

Example 75 with Explanation

use of org.apache.lucene.search.Explanation in project molgenis by molgenis.

the class ElasticSearchExplainServiceImplTest method testReverseSearchQueryStrings.

@Test
public void testReverseSearchQueryStrings() {
    Explanation explanation_2 = Explanation.match(Float.valueOf("3.6267629"), "sum of:", Explanation.match(Float.valueOf("2.0587344"), "weight(label:high in 328) [PerFieldSimilarity], result of:"), Explanation.match(Float.valueOf("1.5680285"), "weight(label:blood in 328) [PerFieldSimilarity], result of:"));
    Explanation explanation_3 = Explanation.match(Float.valueOf("1.754909"), "max of:", Explanation.match(Float.valueOf("1.754909"), "weight(label:medication in 328) [PerFieldSimilarity], result of:"));
    Explanation explanation_1 = Explanation.match(Float.valueOf("5.381672"), "sum of:", explanation_2, explanation_3);
    Map<String, String> expanedQueryMap = new HashMap<>();
    expanedQueryMap.put("hypertension", "hypertension");
    expanedQueryMap.put("hypertensive disorder", "hypertension");
    expanedQueryMap.put("high blood pressure", "hypertension");
    expanedQueryMap.put("medication", "medication");
    expanedQueryMap.put("drug", "medication");
    expanedQueryMap.put("pill", "medication");
    Set<ExplainedQueryString> reverseSearchQueryStrings = elasticSearchExplainService.findQueriesFromExplanation(expanedQueryMap, explanation_1);
    Iterator<ExplainedQueryString> iterator = reverseSearchQueryStrings.iterator();
    ExplainedQueryString first = iterator.next();
    assertEquals(first.getMatchedWords(), "high blood");
    assertEquals(first.getQueryString(), "high blood pressure");
    assertEquals(first.getTagName(), "hypertension");
    assertEquals((int) first.getScore(), 73);
    ExplainedQueryString second = iterator.next();
    assertEquals(second.getMatchedWords(), "medication");
    assertEquals(second.getQueryString(), "medication");
    assertEquals(second.getTagName(), "medication");
    assertEquals((int) second.getScore(), 100);
}
Also used : HashMap(java.util.HashMap) Explanation(org.apache.lucene.search.Explanation) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) Test(org.testng.annotations.Test)

Aggregations

Explanation (org.apache.lucene.search.Explanation)77 TermQuery (org.apache.lucene.search.TermQuery)16 ArrayList (java.util.ArrayList)13 Query (org.apache.lucene.search.Query)13 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 Term (org.apache.lucene.index.Term)11 IOException (java.io.IOException)9 BooleanQuery (org.apache.lucene.search.BooleanQuery)8 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)8 Directory (org.apache.lucene.store.Directory)8 Document (org.apache.lucene.document.Document)7 TopDocs (org.apache.lucene.search.TopDocs)7 IndexReader (org.apache.lucene.index.IndexReader)6 IndexWriter (org.apache.lucene.index.IndexWriter)6 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)6 Collectors (java.util.stream.Collectors)5 Collections (java.util.Collections)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 FunctionValues (org.apache.lucene.queries.function.FunctionValues)4 SearchResponse (org.elasticsearch.action.search.SearchResponse)4