use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket in project elasticsearch by elastic.
the class SignificantTermsIT method testIncludeExcludeExactValues.
public void testIncludeExcludeExactValues() throws Exception {
String[] incExcTerms = { "weller", "nosuchterm" };
SearchResponse response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(null, incExcTerms))).get();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
Set<String> terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKeyAsString());
}
assertEquals(new HashSet<String>(Arrays.asList("jam", "council", "style", "paul", "of", "the")), terms);
response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(incExcTerms, null))).get();
assertSearchResponse(response);
topTerms = response.getAggregations().get("mySignificantTerms");
terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKeyAsString());
}
assertThat(terms, hasSize(1));
assertThat(terms.contains("weller"), is(true));
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket in project elasticsearch by elastic.
the class SignificantTermsIT method testNestedAggs.
public void testNestedAggs() throws Exception {
String[][] expectedKeywordsByCategory = { { "paul", "weller", "jam", "style", "council" }, { "paul", "smith" }, { "craig", "kelly", "terje", "haakonsen", "burton" } };
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).addAggregation(terms("myCategories").field("fact_category").minDocCount(2).subAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).minDocCount(2))).execute().actionGet();
assertSearchResponse(response);
Terms topCategoryTerms = response.getAggregations().get("myCategories");
for (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket topCategory : topCategoryTerms.getBuckets()) {
SignificantTerms topTerms = topCategory.getAggregations().get("mySignificantTerms");
HashSet<String> foundTopWords = new HashSet<String>();
for (Bucket topTerm : topTerms) {
foundTopWords.add(topTerm.getKeyAsString());
}
String[] expectedKeywords = expectedKeywordsByCategory[Integer.parseInt(topCategory.getKeyAsString()) - 1];
for (String expectedKeyword : expectedKeywords) {
assertTrue(expectedKeyword + " missing from category keywords", foundTopWords.contains(expectedKeyword));
}
}
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket in project elasticsearch by elastic.
the class SignificantTermsIT method testBadFilteredAnalysis.
public void testBadFilteredAnalysis() throws Exception {
// Deliberately using a bad choice of filter here for the background context in order
// to test robustness.
// We search for the name of a snowboarder but use music-related content (fact_category:1)
// as the background source of term statistics.
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").minDocCount(2).backgroundFilter(QueryBuilders.termQuery("fact_category", 1))).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
// We expect at least one of the significant terms to have been selected on the basis
// that it is present in the foreground selection but entirely missing from the filtered
// background used as context.
boolean hasMissingBackgroundTerms = false;
for (Bucket topTerm : topTerms) {
if (topTerm.getSupersetDf() == 0) {
hasMissingBackgroundTerms = true;
break;
}
}
assertTrue(hasMissingBackgroundTerms);
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket in project elasticsearch by elastic.
the class SignificantTermsIT method checkExpectedStringTermsFound.
private void checkExpectedStringTermsFound(SignificantTerms topTerms) {
HashMap<String, Bucket> topWords = new HashMap<>();
for (Bucket topTerm : topTerms) {
topWords.put(topTerm.getKeyAsString(), topTerm);
}
assertTrue(topWords.containsKey("haakonsen"));
assertTrue(topWords.containsKey("craig"));
assertTrue(topWords.containsKey("kelly"));
assertTrue(topWords.containsKey("burton"));
assertTrue(topWords.containsKey("snowboards"));
Bucket kellyTerm = topWords.get("kelly");
assertEquals(3, kellyTerm.getSubsetDf());
assertEquals(4, kellyTerm.getSupersetDf());
}
use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket in project elasticsearch by elastic.
the class SignificantTermsIT method testPartiallyUnmappedWithFormat.
public void testPartiallyUnmappedWithFormat() throws Exception {
SearchResponse response = client().prepareSearch("idx_unmapped", "test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(boolQuery().should(termQuery("description", "the")).should(termQuery("description", "terje"))).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()).minDocCount(1).format("0000")).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
for (int i = 1; i <= 3; i++) {
String key = String.format(Locale.ROOT, "%04d", i);
SignificantTerms.Bucket bucket = topTerms.getBucketByKey(key);
assertThat(bucket, notNullValue());
assertThat(bucket.getKeyAsString(), equalTo(key));
}
}
Aggregations