Search in sources :

Example 1 with ExplainedQueryString

use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.

the class AlgorithmTemplateServiceImplTest method find.

@Test
public void find() {
    String sourceAttr0Name = "sourceAttr0";
    String sourceAttr1Name = "sourceAttr1";
    EntityType sourceEntityMeta = entityTypeFactory.create("source");
    Attribute sourceAttr0 = attrMetaFactory.create().setName(sourceAttr0Name);
    Attribute sourceAttr1 = attrMetaFactory.create().setName(sourceAttr1Name);
    sourceEntityMeta.addAttribute(sourceAttr0);
    sourceEntityMeta.addAttribute(sourceAttr1);
    ExplainedQueryString sourceAttr0Explain = ExplainedQueryString.create("a", "b", param0Name, 1.0);
    ExplainedQueryString sourceAttr1Explain = ExplainedQueryString.create("a", "b", param1Name, 0.5);
    Map<Attribute, ExplainedAttribute> attrResults = Maps.newHashMap();
    attrResults.put(sourceAttr0, ExplainedAttribute.create(sourceAttr0, singletonList(sourceAttr0Explain), false));
    attrResults.put(sourceAttr1, ExplainedAttribute.create(sourceAttr1, singletonList(sourceAttr1Explain), false));
    Stream<AlgorithmTemplate> templateStream = algorithmTemplateServiceImpl.find(attrResults);
    Map<String, String> model = Maps.newHashMap();
    model.put(param0Name, sourceAttr0Name);
    model.put(param1Name, sourceAttr1Name);
    AlgorithmTemplate expectedAlgorithmTemplate = new AlgorithmTemplate(script0, model);
    assertEquals(templateStream.collect(Collectors.toList()), Stream.of(expectedAlgorithmTemplate).collect(Collectors.toList()));
}
Also used : EntityType(org.molgenis.data.meta.model.EntityType) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) Attribute(org.molgenis.data.meta.model.Attribute) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) Test(org.testng.annotations.Test) AbstractMolgenisSpringTest(org.molgenis.data.AbstractMolgenisSpringTest)

Example 2 with ExplainedQueryString

use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.

the class SemanticSearchServiceImpl method convertAttributeToExplainedAttribute.

/**
 * A helper function to explain each of the matched attributes returned by the explain-API
 *
 * @param attribute               The attribute found
 * @param collectExpandedQueryMap ?
 * @param query                   the query used to find the attribute
 * @return Set of explained query strings
 */
public Set<ExplainedQueryString> convertAttributeToExplainedAttribute(Attribute attribute, Map<String, String> collectExpandedQueryMap, Query<Entity> query) {
    EntityType attributeMetaData = dataService.getEntityType(ATTRIBUTE_META_DATA);
    String attributeID = attribute.getIdentifier();
    Explanation explanation = elasticSearchExplainService.explain(query, attributeMetaData, attributeID);
    return elasticSearchExplainService.findQueriesFromExplanation(collectExpandedQueryMap, explanation);
}
Also used : EntityType(org.molgenis.data.meta.model.EntityType) Explanation(org.apache.lucene.search.Explanation) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString)

Example 3 with ExplainedQueryString

use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.

the class SemanticSearchServiceImpl method findAttributes.

@Override
public Map<Attribute, ExplainedAttribute> findAttributes(EntityType sourceEntityType, Set<String> queryTerms, Collection<OntologyTerm> ontologyTerms) {
    Iterable<String> attributeIdentifiers = semanticSearchServiceHelper.getAttributeIdentifiers(sourceEntityType);
    QueryRule disMaxQueryRule = semanticSearchServiceHelper.createDisMaxQueryRuleForAttribute(queryTerms, ontologyTerms);
    List<QueryRule> finalQueryRules = Lists.newArrayList(new QueryRule(AttributeMetadata.ID, Operator.IN, attributeIdentifiers));
    if (disMaxQueryRule.getNestedRules().size() > 0) {
        finalQueryRules.addAll(Arrays.asList(new QueryRule(Operator.AND), disMaxQueryRule));
    }
    Stream<Entity> attributeEntities = dataService.findAll(ATTRIBUTE_META_DATA, new QueryImpl<>(finalQueryRules));
    Map<String, String> collectExpanedQueryMap = semanticSearchServiceHelper.collectExpandedQueryMap(queryTerms, ontologyTerms);
    // Because the explain-API can be computationally expensive we limit the explanation to the top 10 attributes
    Map<Attribute, ExplainedAttribute> explainedAttributes = new LinkedHashMap<>();
    AtomicInteger count = new AtomicInteger(0);
    attributeEntities.forEach(attributeEntity -> {
        Attribute attribute = sourceEntityType.getAttribute(attributeEntity.getString(AttributeMetadata.NAME));
        if (count.get() < MAX_NUMBER_EXPLAINED_ATTRIBUTES) {
            Set<ExplainedQueryString> explanations = convertAttributeToExplainedAttribute(attribute, collectExpanedQueryMap, new QueryImpl<>(finalQueryRules));
            boolean singleMatchHighQuality = isSingleMatchHighQuality(queryTerms, Sets.newHashSet(collectExpanedQueryMap.values()), explanations);
            explainedAttributes.put(attribute, ExplainedAttribute.create(attribute, explanations, singleMatchHighQuality));
        } else {
            explainedAttributes.put(attribute, ExplainedAttribute.create(attribute));
        }
        count.incrementAndGet();
    });
    return explainedAttributes;
}
Also used : Entity(org.molgenis.data.Entity) Attribute(org.molgenis.data.meta.model.Attribute) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) ExplainedAttribute(org.molgenis.semanticsearch.explain.bean.ExplainedAttribute) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) QueryRule(org.molgenis.data.QueryRule) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString)

Example 4 with ExplainedQueryString

use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.

the class SearchServiceIT method testSemanticSearch.

@Test
public void testSemanticSearch() {
    List<Entity> attributes = createDynamic(6).collect(toList());
    attributes.get(0).set(ATTR_STRING, "High chance of pulmonary disease");
    attributes.get(1).set(ATTR_STRING, "And now for something completely different...");
    attributes.get(2).set(ATTR_STRING, "Are you taking hypertensive medication?");
    attributes.get(3).set(ATTR_STRING, "Have you ever had high blood pressure? (Repeat) (1)");
    attributes.get(4).set(ATTR_STRING, "Do you suffer from Ocular hypertension?");
    attributes.get(5).set(ATTR_STRING, "Do you have a vascular disorder?");
    Entity ontology1 = attributes.get(0).getEntity(ATTR_CATEGORICAL);
    Entity ontology2 = attributes.get(1).getEntity(ATTR_CATEGORICAL);
    for (Entity term : attributes) {
        term.set(ATTR_CATEGORICAL, ontology1);
    }
    attributes.get(5).set(ATTR_CATEGORICAL, ontology2);
    searchService.index(entityTypeDynamic, attributes.stream());
    searchService.refreshIndex();
    List<String> queryTerms = asList("hypertension", "disorder vascular hypertensive", "increased pressure blood", "high pressure blood", "ocular^0.5 hypertension^0.5", "hypertension^0.25 idiopathic^0.25 pulmonary^0.25");
    QueryRule finalDisMaxQuery = new QueryRule(queryTerms.stream().flatMap(term -> Stream.of(new QueryRule(ATTR_STRING, FUZZY_MATCH, term), new QueryRule(ATTR_SCRIPT, FUZZY_MATCH, term))).collect(toList()));
    finalDisMaxQuery.setOperator(DIS_MAX);
    List<String> attributeIds = asList("0", "1", "2", "3", "4", "5");
    Query<Entity> query = new QueryImpl<>(asList(new QueryRule(ATTR_ID, IN, attributeIds), new QueryRule(AND), finalDisMaxQuery));
    List<Object> matchingAttributeIDs = searchService.search(entityTypeDynamic, query).collect(toList());
    assertEquals(matchingAttributeIDs.get(0), "3");
    assertEquals(matchingAttributeIDs.get(1), "5");
    assertFalse(matchingAttributeIDs.contains("1"));
    List<Explanation> explanations = attributeIds.stream().map(id -> explainService.explain(query, entityTypeDynamic, id)).collect(toList());
    List<Float> scores = explanations.stream().map(Explanation::getValue).collect(toList());
    // FIXME these scores vary between runs
    // assertEquals(scores, asList(0.3463153, 0, 0.7889965, 1.7814579, 0.76421005, 1.0707202));
    Map<String, String> expandedQueryMap = new HashMap<>();
    for (String term : asList("hypertens", "disord vascular hypertens", "increased pressur blood", "high pressur blood", "ocular hypertens", "hypertens idiopathic pulmonary")) {
        expandedQueryMap.put(term, "hypertension");
    }
    List<Set<ExplainedQueryString>> explanationStrings = explanations.stream().map(explanation -> explainService.findQueriesFromExplanation(expandedQueryMap, explanation)).collect(toList());
    List<Set<ExplainedQueryString>> expectedExplanationStrings = asList(// High chance of pulmonary disease
    singleton(ExplainedQueryString.create("high", "high pressur blood", "hypertension", 41.66666666666667)), // And now for something completely different...
    emptySet(), // Are you taking hypertensive medication?
    singleton(ExplainedQueryString.create("hypertens", "hypertens", "hypertension", 100.0)), // Have you ever had high blood pressure? (Repeat) (1)
    singleton(ExplainedQueryString.create("high pressur blood", "high pressur blood", "hypertension", 100.0)), // Do you suffer from Ocular hypertension?
    singleton(ExplainedQueryString.create("ocular hypertens", "ocular hypertens", "hypertension", 100.0)), // Do you have a vascular disorder?
    singleton(ExplainedQueryString.create("disord vascular", "disord vascular hypertens", "hypertension", 78.04878048780488)));
    assertEquals(explanationStrings, expectedExplanationStrings);
}
Also used : DataProvider(org.testng.annotations.DataProvider) Operator(org.molgenis.data.QueryRule.Operator) MolgenisDateFormat.parseLocalDate(org.molgenis.data.util.MolgenisDateFormat.parseLocalDate) Autowired(org.springframework.beans.factory.annotation.Autowired) Test(org.testng.annotations.Test) HashMap(java.util.HashMap) QueryImpl(org.molgenis.data.support.QueryImpl) AfterMethod(org.testng.annotations.AfterMethod) Supplier(java.util.function.Supplier) Assert(org.testng.Assert) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) ElasticsearchService(org.molgenis.data.elasticsearch.ElasticsearchService) AbstractTestNGSpringContextTests(org.springframework.test.context.testng.AbstractTestNGSpringContextTests) EntityTestHarness(org.molgenis.data.EntityTestHarness) Explanation(org.apache.lucene.search.Explanation) org.molgenis.data(org.molgenis.data) ElasticSearchExplainService(org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService) MolgenisDateFormat.parseInstant(org.molgenis.data.util.MolgenisDateFormat.parseInstant) BeforeMethod(org.testng.annotations.BeforeMethod) Set(java.util.Set) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) EntityType(org.molgenis.data.meta.model.EntityType) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) Stream(java.util.stream.Stream) ContextConfiguration(org.springframework.test.context.ContextConfiguration) UnknownIndexException(org.molgenis.data.index.exception.UnknownIndexException) Collections(java.util.Collections) Set(java.util.Set) HashMap(java.util.HashMap) Explanation(org.apache.lucene.search.Explanation) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) QueryImpl(org.molgenis.data.support.QueryImpl) Test(org.testng.annotations.Test)

Example 5 with ExplainedQueryString

use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.

the class ElasticSearchExplainServiceImpl method findQueriesFromExplanation.

public Set<ExplainedQueryString> findQueriesFromExplanation(Map<String, String> originalQueryInMap, Explanation explanation) {
    Set<ExplainedQueryString> matchedQueryStrings = new LinkedHashSet<>();
    Set<String> matchedQueryTerms = explainServiceHelper.findMatchedWords(explanation);
    for (String matchedQueryTerm : matchedQueryTerms) {
        Map<String, Double> matchedQueryRule = explainServiceHelper.findMatchQueries(matchedQueryTerm, originalQueryInMap);
        if (matchedQueryRule.size() > 0) {
            Entry<String, Double> entry = matchedQueryRule.entrySet().stream().max(Comparator.comparingDouble(Entry::getValue)).get();
            matchedQueryStrings.add(ExplainedQueryString.create(matchedQueryTerm, entry.getKey(), originalQueryInMap.get(entry.getKey()), entry.getValue()));
        }
    }
    return matchedQueryStrings;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Entry(java.util.Map.Entry) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString) ExplainedQueryString(org.molgenis.semanticsearch.explain.bean.ExplainedQueryString)

Aggregations

ExplainedQueryString (org.molgenis.semanticsearch.explain.bean.ExplainedQueryString)7 Explanation (org.apache.lucene.search.Explanation)4 EntityType (org.molgenis.data.meta.model.EntityType)4 Test (org.testng.annotations.Test)3 HashMap (java.util.HashMap)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 Stream (java.util.stream.Stream)2 Entity (org.molgenis.data.Entity)2 QueryRule (org.molgenis.data.QueryRule)2 Operator (org.molgenis.data.QueryRule.Operator)2 Attribute (org.molgenis.data.meta.model.Attribute)2 QueryImpl (org.molgenis.data.support.QueryImpl)2 ExplainedAttribute (org.molgenis.semanticsearch.explain.bean.ExplainedAttribute)2 ElasticSearchExplainService (org.molgenis.semanticsearch.explain.service.ElasticSearchExplainService)2 Joiner (com.google.common.base.Joiner)1 Splitter (com.google.common.base.Splitter)1 FluentIterable (com.google.common.collect.FluentIterable)1 Lists (com.google.common.collect.Lists)1 Ordering (com.google.common.collect.Ordering)1 Sets (com.google.common.collect.Sets)1