use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.
the class AlgorithmTemplateServiceImplTest method find.
@Test
public void find() {
String sourceAttr0Name = "sourceAttr0";
String sourceAttr1Name = "sourceAttr1";
EntityType sourceEntityMeta = entityTypeFactory.create("source");
Attribute sourceAttr0 = attrMetaFactory.create().setName(sourceAttr0Name);
Attribute sourceAttr1 = attrMetaFactory.create().setName(sourceAttr1Name);
sourceEntityMeta.addAttribute(sourceAttr0);
sourceEntityMeta.addAttribute(sourceAttr1);
ExplainedQueryString sourceAttr0Explain = ExplainedQueryString.create("a", "b", param0Name, 1.0);
ExplainedQueryString sourceAttr1Explain = ExplainedQueryString.create("a", "b", param1Name, 0.5);
Map<Attribute, ExplainedAttribute> attrResults = Maps.newHashMap();
attrResults.put(sourceAttr0, ExplainedAttribute.create(sourceAttr0, singletonList(sourceAttr0Explain), false));
attrResults.put(sourceAttr1, ExplainedAttribute.create(sourceAttr1, singletonList(sourceAttr1Explain), false));
Stream<AlgorithmTemplate> templateStream = algorithmTemplateServiceImpl.find(attrResults);
Map<String, String> model = Maps.newHashMap();
model.put(param0Name, sourceAttr0Name);
model.put(param1Name, sourceAttr1Name);
AlgorithmTemplate expectedAlgorithmTemplate = new AlgorithmTemplate(script0, model);
assertEquals(templateStream.collect(Collectors.toList()), Stream.of(expectedAlgorithmTemplate).collect(Collectors.toList()));
}
use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.
the class SemanticSearchServiceImpl method convertAttributeToExplainedAttribute.
/**
* A helper function to explain each of the matched attributes returned by the explain-API
*
* @param attribute The attribute found
* @param collectExpandedQueryMap ?
* @param query the query used to find the attribute
* @return Set of explained query strings
*/
public Set<ExplainedQueryString> convertAttributeToExplainedAttribute(Attribute attribute, Map<String, String> collectExpandedQueryMap, Query<Entity> query) {
EntityType attributeMetaData = dataService.getEntityType(ATTRIBUTE_META_DATA);
String attributeID = attribute.getIdentifier();
Explanation explanation = elasticSearchExplainService.explain(query, attributeMetaData, attributeID);
return elasticSearchExplainService.findQueriesFromExplanation(collectExpandedQueryMap, explanation);
}
use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.
the class SemanticSearchServiceImpl method findAttributes.
@Override
public Map<Attribute, ExplainedAttribute> findAttributes(EntityType sourceEntityType, Set<String> queryTerms, Collection<OntologyTerm> ontologyTerms) {
Iterable<String> attributeIdentifiers = semanticSearchServiceHelper.getAttributeIdentifiers(sourceEntityType);
QueryRule disMaxQueryRule = semanticSearchServiceHelper.createDisMaxQueryRuleForAttribute(queryTerms, ontologyTerms);
List<QueryRule> finalQueryRules = Lists.newArrayList(new QueryRule(AttributeMetadata.ID, Operator.IN, attributeIdentifiers));
if (disMaxQueryRule.getNestedRules().size() > 0) {
finalQueryRules.addAll(Arrays.asList(new QueryRule(Operator.AND), disMaxQueryRule));
}
Stream<Entity> attributeEntities = dataService.findAll(ATTRIBUTE_META_DATA, new QueryImpl<>(finalQueryRules));
Map<String, String> collectExpanedQueryMap = semanticSearchServiceHelper.collectExpandedQueryMap(queryTerms, ontologyTerms);
// Because the explain-API can be computationally expensive we limit the explanation to the top 10 attributes
Map<Attribute, ExplainedAttribute> explainedAttributes = new LinkedHashMap<>();
AtomicInteger count = new AtomicInteger(0);
attributeEntities.forEach(attributeEntity -> {
Attribute attribute = sourceEntityType.getAttribute(attributeEntity.getString(AttributeMetadata.NAME));
if (count.get() < MAX_NUMBER_EXPLAINED_ATTRIBUTES) {
Set<ExplainedQueryString> explanations = convertAttributeToExplainedAttribute(attribute, collectExpanedQueryMap, new QueryImpl<>(finalQueryRules));
boolean singleMatchHighQuality = isSingleMatchHighQuality(queryTerms, Sets.newHashSet(collectExpanedQueryMap.values()), explanations);
explainedAttributes.put(attribute, ExplainedAttribute.create(attribute, explanations, singleMatchHighQuality));
} else {
explainedAttributes.put(attribute, ExplainedAttribute.create(attribute));
}
count.incrementAndGet();
});
return explainedAttributes;
}
use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.
the class SearchServiceIT method testSemanticSearch.
@Test
public void testSemanticSearch() {
List<Entity> attributes = createDynamic(6).collect(toList());
attributes.get(0).set(ATTR_STRING, "High chance of pulmonary disease");
attributes.get(1).set(ATTR_STRING, "And now for something completely different...");
attributes.get(2).set(ATTR_STRING, "Are you taking hypertensive medication?");
attributes.get(3).set(ATTR_STRING, "Have you ever had high blood pressure? (Repeat) (1)");
attributes.get(4).set(ATTR_STRING, "Do you suffer from Ocular hypertension?");
attributes.get(5).set(ATTR_STRING, "Do you have a vascular disorder?");
Entity ontology1 = attributes.get(0).getEntity(ATTR_CATEGORICAL);
Entity ontology2 = attributes.get(1).getEntity(ATTR_CATEGORICAL);
for (Entity term : attributes) {
term.set(ATTR_CATEGORICAL, ontology1);
}
attributes.get(5).set(ATTR_CATEGORICAL, ontology2);
searchService.index(entityTypeDynamic, attributes.stream());
searchService.refreshIndex();
List<String> queryTerms = asList("hypertension", "disorder vascular hypertensive", "increased pressure blood", "high pressure blood", "ocular^0.5 hypertension^0.5", "hypertension^0.25 idiopathic^0.25 pulmonary^0.25");
QueryRule finalDisMaxQuery = new QueryRule(queryTerms.stream().flatMap(term -> Stream.of(new QueryRule(ATTR_STRING, FUZZY_MATCH, term), new QueryRule(ATTR_SCRIPT, FUZZY_MATCH, term))).collect(toList()));
finalDisMaxQuery.setOperator(DIS_MAX);
List<String> attributeIds = asList("0", "1", "2", "3", "4", "5");
Query<Entity> query = new QueryImpl<>(asList(new QueryRule(ATTR_ID, IN, attributeIds), new QueryRule(AND), finalDisMaxQuery));
List<Object> matchingAttributeIDs = searchService.search(entityTypeDynamic, query).collect(toList());
assertEquals(matchingAttributeIDs.get(0), "3");
assertEquals(matchingAttributeIDs.get(1), "5");
assertFalse(matchingAttributeIDs.contains("1"));
List<Explanation> explanations = attributeIds.stream().map(id -> explainService.explain(query, entityTypeDynamic, id)).collect(toList());
List<Float> scores = explanations.stream().map(Explanation::getValue).collect(toList());
// FIXME these scores vary between runs
// assertEquals(scores, asList(0.3463153, 0, 0.7889965, 1.7814579, 0.76421005, 1.0707202));
Map<String, String> expandedQueryMap = new HashMap<>();
for (String term : asList("hypertens", "disord vascular hypertens", "increased pressur blood", "high pressur blood", "ocular hypertens", "hypertens idiopathic pulmonary")) {
expandedQueryMap.put(term, "hypertension");
}
List<Set<ExplainedQueryString>> explanationStrings = explanations.stream().map(explanation -> explainService.findQueriesFromExplanation(expandedQueryMap, explanation)).collect(toList());
List<Set<ExplainedQueryString>> expectedExplanationStrings = asList(// High chance of pulmonary disease
singleton(ExplainedQueryString.create("high", "high pressur blood", "hypertension", 41.66666666666667)), // And now for something completely different...
emptySet(), // Are you taking hypertensive medication?
singleton(ExplainedQueryString.create("hypertens", "hypertens", "hypertension", 100.0)), // Have you ever had high blood pressure? (Repeat) (1)
singleton(ExplainedQueryString.create("high pressur blood", "high pressur blood", "hypertension", 100.0)), // Do you suffer from Ocular hypertension?
singleton(ExplainedQueryString.create("ocular hypertens", "ocular hypertens", "hypertension", 100.0)), // Do you have a vascular disorder?
singleton(ExplainedQueryString.create("disord vascular", "disord vascular hypertens", "hypertension", 78.04878048780488)));
assertEquals(explanationStrings, expectedExplanationStrings);
}
use of org.molgenis.semanticsearch.explain.bean.ExplainedQueryString in project molgenis by molgenis.
the class ElasticSearchExplainServiceImpl method findQueriesFromExplanation.
public Set<ExplainedQueryString> findQueriesFromExplanation(Map<String, String> originalQueryInMap, Explanation explanation) {
Set<ExplainedQueryString> matchedQueryStrings = new LinkedHashSet<>();
Set<String> matchedQueryTerms = explainServiceHelper.findMatchedWords(explanation);
for (String matchedQueryTerm : matchedQueryTerms) {
Map<String, Double> matchedQueryRule = explainServiceHelper.findMatchQueries(matchedQueryTerm, originalQueryInMap);
if (matchedQueryRule.size() > 0) {
Entry<String, Double> entry = matchedQueryRule.entrySet().stream().max(Comparator.comparingDouble(Entry::getValue)).get();
matchedQueryStrings.add(ExplainedQueryString.create(matchedQueryTerm, entry.getKey(), originalQueryInMap.get(entry.getKey()), entry.getValue()));
}
}
return matchedQueryStrings;
}
Aggregations