use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.
the class SignificantTermsIT method testStructuredAnalysisWithIncludeExclude.
public void testStructuredAnalysisWithIncludeExclude() throws Exception {
long[] excludeTerms = { MUSIC_CATEGORY };
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "paul")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()).minDocCount(1).includeExclude(new IncludeExclude(null, excludeTerms))).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
Number topCategory = (Number) topTerms.getBuckets().iterator().next().getKey();
assertTrue(topCategory.equals(Long.valueOf(OTHER_CATEGORY)));
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.
the class SignificantTermsIT method testTextAnalysisGND.
public void testTextAnalysisGND() throws Exception {
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).significanceHeuristic(new GND(true)).minDocCount(2)).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
checkExpectedStringTermsFound(topTerms);
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.
the class SignificantTermsIT method testTextAnalysisPercentageScore.
public void testTextAnalysisPercentageScore() throws Exception {
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).significanceHeuristic(new PercentageScore()).minDocCount(2)).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
checkExpectedStringTermsFound(topTerms);
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.
the class SignificantTermsIT method testMutualInformation.
public void testMutualInformation() throws Exception {
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).significanceHeuristic(new MutualInformation(false, true)).minDocCount(1)).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
checkExpectedStringTermsFound(topTerms);
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.
the class SignificantTermsIT method testBadFilteredAnalysis.
public void testBadFilteredAnalysis() throws Exception {
// Deliberately using a bad choice of filter here for the background context in order
// to test robustness.
// We search for the name of a snowboarder but use music-related content (fact_category:1)
// as the background source of term statistics.
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").minDocCount(2).backgroundFilter(QueryBuilders.termQuery("fact_category", 1))).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
// We expect at least one of the significant terms to have been selected on the basis
// that it is present in the foreground selection but entirely missing from the filtered
// background used as context.
boolean hasMissingBackgroundTerms = false;
for (Bucket topTerm : topTerms) {
if (topTerm.getSupersetDf() == 0) {
hasMissingBackgroundTerms = true;
break;
}
}
assertTrue(hasMissingBackgroundTerms);
}
Aggregations