use of org.molgenis.data.EntityTestHarness.ATTR_CATEGORICAL in project molgenis by molgenis.
the class SearchServiceIT method testSemanticSearch.
@Test
void testSemanticSearch() {
List<Entity> attributes = createDynamic(6).collect(toList());
attributes.get(0).set(ATTR_STRING, "High chance of pulmonary disease");
attributes.get(1).set(ATTR_STRING, "And now for something completely different...");
attributes.get(2).set(ATTR_STRING, "Are you taking hypertensive medication?");
attributes.get(3).set(ATTR_STRING, "Have you ever had high blood pressure? (Repeat) (1)");
attributes.get(4).set(ATTR_STRING, "Do you suffer from Ocular hypertension?");
attributes.get(5).set(ATTR_STRING, "Do you have a vascular disorder?");
Entity ontology1 = attributes.get(0).getEntity(ATTR_CATEGORICAL);
Entity ontology2 = attributes.get(1).getEntity(ATTR_CATEGORICAL);
for (Entity term : attributes) {
term.set(ATTR_CATEGORICAL, ontology1);
}
attributes.get(5).set(ATTR_CATEGORICAL, ontology2);
searchService.index(entityTypeDynamic, attributes.stream());
searchService.refreshIndex();
List<String> queryTerms = asList("hypertension", "disorder vascular hypertensive", "increased pressure blood", "high pressure blood", "ocular^0.5 hypertension^0.5", "hypertension^0.25 idiopathic^0.25 pulmonary^0.25");
QueryRule finalDisMaxQuery = new QueryRule(queryTerms.stream().flatMap(term -> Stream.of(new QueryRule(ATTR_STRING, FUZZY_MATCH, term), new QueryRule(ATTR_SCRIPT, FUZZY_MATCH, term))).collect(toList()));
finalDisMaxQuery.setOperator(DIS_MAX);
List<String> attributeIds = asList("0", "1", "2", "3", "4", "5");
Query<Entity> query = new QueryImpl<>(asList(new QueryRule(ATTR_ID, IN, attributeIds), new QueryRule(AND), finalDisMaxQuery));
List<Object> matchingAttributeIDs = searchService.search(entityTypeDynamic, query).collect(toList());
assertEquals("3", matchingAttributeIDs.get(0));
assertEquals("5", matchingAttributeIDs.get(1));
assertFalse(matchingAttributeIDs.contains("1"));
List<Explanation> explanations = attributeIds.stream().map(id -> explainService.explain(query, entityTypeDynamic, id)).collect(toList());
// FIXME these scores vary between runs
// List<Float> scores = explanations.stream().map(Explanation::getValue).collect(toList());
// assertEquals(scores, asList(0.3463153, 0, 0.7889965,
// 1.7814579, 0.76421005, 1.0707202));
Map<String, String> expandedQueryMap = new HashMap<>();
for (String term : asList("hypertens", "disord vascular hypertens", "increased pressur blood", "high pressur blood", "ocular hypertens", "hypertens idiopathic pulmonary")) {
expandedQueryMap.put(term, "hypertension");
}
List<Set<ExplainedQueryString>> explanationStrings = explanations.stream().map(explanation -> explainService.findQueriesFromExplanation(expandedQueryMap, explanation)).collect(toList());
List<Set<ExplainedQueryString>> expectedExplanationStrings = asList(// High chance of pulmonary disease
singleton(ExplainedQueryString.create("high", "high pressur blood", "hypertension", 41.66666666666667)), // And now for something completely different...
emptySet(), // Are you taking hypertensive medication?
singleton(ExplainedQueryString.create("hypertens", "hypertens", "hypertension", 100.0)), // Have you ever had high blood pressure? (Repeat) (1)
singleton(ExplainedQueryString.create("high pressur blood", "high pressur blood", "hypertension", 100.0)), // Do you suffer from Ocular hypertension?
singleton(ExplainedQueryString.create("ocular hypertens", "ocular hypertens", "hypertension", 100.0)), // Do you have a vascular disorder?
singleton(ExplainedQueryString.create("disord vascular", "disord vascular hypertens", "hypertension", 78.04878048780488)));
assertEquals(expectedExplanationStrings, explanationStrings);
}
Aggregations