use of org.opensearch.search.aggregations.bucket.terms.SignificantTerms in project OpenSearch by opensearch-project.
the class TermsShardMinDocCountIT method testShardMinDocCountSignificantTermsTest.
// see https://github.com/elastic/elasticsearch/issues/5998
public void testShardMinDocCountSignificantTermsTest() throws Exception {
String textMappings;
if (randomBoolean()) {
textMappings = "type=long";
} else {
textMappings = "type=text,fielddata=true";
}
assertAcked(prepareCreate(index).setSettings(Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0)).addMapping(MapperService.SINGLE_MAPPING_NAME, "text", textMappings));
List<IndexRequestBuilder> indexBuilders = new ArrayList<>();
// high score but low doc freq
addTermsDocs("1", 1, 0, indexBuilders);
addTermsDocs("2", 1, 0, indexBuilders);
addTermsDocs("3", 1, 0, indexBuilders);
addTermsDocs("4", 1, 0, indexBuilders);
// low score but high doc freq
addTermsDocs("5", 3, 1, indexBuilders);
addTermsDocs("6", 3, 1, indexBuilders);
// make sure the terms all get score > 0 except for this one
addTermsDocs("7", 0, 3, indexBuilders);
indexRandom(true, false, indexBuilders);
// first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
SearchResponse response = client().prepareSearch(index).addAggregation((filter("inclass", QueryBuilders.termQuery("class", true))).subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).size(2).shardSize(2).executionHint(randomExecutionHint()))).get();
assertSearchResponse(response);
InternalFilter filteredBucket = response.getAggregations().get("inclass");
SignificantTerms sigterms = filteredBucket.getAggregations().get("mySignificantTerms");
assertThat(sigterms.getBuckets().size(), equalTo(0));
response = client().prepareSearch(index).addAggregation((filter("inclass", QueryBuilders.termQuery("class", true))).subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).shardSize(2).shardMinDocCount(2).size(2).executionHint(randomExecutionHint()))).get();
assertSearchResponse(response);
filteredBucket = response.getAggregations().get("inclass");
sigterms = filteredBucket.getAggregations().get("mySignificantTerms");
assertThat(sigterms.getBuckets().size(), equalTo(2));
}
use of org.opensearch.search.aggregations.bucket.terms.SignificantTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testSubAggregations.
/**
* A simple test that adds a sub-aggregation to a significant terms aggregation,
* to help check that sub-aggregation collection is handled correctly.
*/
public void testSubAggregations() throws Exception {
indexEqualTestData();
QueryBuilder query = QueryBuilders.termsQuery(TEXT_FIELD, "a", "b");
AggregationBuilder subAgg = terms("class").field(CLASS_FIELD);
AggregationBuilder agg = significantTerms("significant_terms").field(TEXT_FIELD).executionHint(randomExecutionHint()).significanceHeuristic(new ChiSquare(true, true)).minDocCount(1).shardSize(1000).size(1000).subAggregation(subAgg);
SearchResponse response = client().prepareSearch("test").setQuery(query).addAggregation(agg).get();
assertSearchResponse(response);
SignificantTerms sigTerms = response.getAggregations().get("significant_terms");
assertThat(sigTerms.getBuckets().size(), equalTo(2));
for (SignificantTerms.Bucket bucket : sigTerms) {
StringTerms terms = bucket.getAggregations().get("class");
assertThat(terms.getBuckets().size(), equalTo(2));
}
}
use of org.opensearch.search.aggregations.bucket.terms.SignificantTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.
public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristic) throws Exception {
// check that results for both classes are the same with exclude negatives = false and classes are routing ids
SearchRequestBuilder request;
if (randomBoolean()) {
request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms").field("text").executionHint(randomExecutionHint()).significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
} else {
request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantText("mySignificantTerms", "text").significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
}
SearchResponse response = request.get();
assertSearchResponse(response);
assertSearchResponse(response);
StringTerms classes = response.getAggregations().get("class");
assertThat(classes.getBuckets().size(), equalTo(2));
Iterator<? extends Terms.Bucket> classBuckets = classes.getBuckets().iterator();
Aggregations aggregations = classBuckets.next().getAggregations();
SignificantTerms sigTerms = aggregations.get("mySignificantTerms");
List<? extends SignificantTerms.Bucket> classA = sigTerms.getBuckets();
Iterator<SignificantTerms.Bucket> classBBucketIterator = sigTerms.iterator();
assertThat(classA.size(), greaterThan(0));
for (SignificantTerms.Bucket classABucket : classA) {
SignificantTerms.Bucket classBBucket = classBBucketIterator.next();
assertThat(classABucket.getKey(), equalTo(classBBucket.getKey()));
assertThat(classABucket.getSignificanceScore(), closeTo(classBBucket.getSignificanceScore(), 1.e-5));
}
}
use of org.opensearch.search.aggregations.bucket.terms.SignificantTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testBackgroundVsSeparateSet.
// compute significance score by
// 1. terms agg on class and significant terms
// 2. filter buckets and set the background to the other class and set is_background false
// both should yield exact same result
public void testBackgroundVsSeparateSet(SignificanceHeuristic significanceHeuristicExpectingSuperset, SignificanceHeuristic significanceHeuristicExpectingSeparateSets, String type) throws Exception {
final boolean useSigText = randomBoolean() && type.equals("text");
SearchRequestBuilder request1;
if (useSigText) {
request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
} else {
request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
}
SearchResponse response1 = request1.get();
assertSearchResponse(response1);
SearchRequestBuilder request2;
if (useSigText) {
request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
} else {
request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
}
SearchResponse response2 = request2.get();
StringTerms classes = response1.getAggregations().get("class");
SignificantTerms sigTerms0 = ((SignificantTerms) (classes.getBucketByKey("0").getAggregations().asMap().get("sig_terms")));
assertThat(sigTerms0.getBuckets().size(), equalTo(2));
double score00Background = sigTerms0.getBucketByKey("0").getSignificanceScore();
double score01Background = sigTerms0.getBucketByKey("1").getSignificanceScore();
SignificantTerms sigTerms1 = ((SignificantTerms) (classes.getBucketByKey("1").getAggregations().asMap().get("sig_terms")));
double score10Background = sigTerms1.getBucketByKey("0").getSignificanceScore();
double score11Background = sigTerms1.getBucketByKey("1").getSignificanceScore();
Aggregations aggs = response2.getAggregations();
sigTerms0 = (SignificantTerms) ((InternalFilter) aggs.get("0")).getAggregations().getAsMap().get("sig_terms");
double score00SeparateSets = sigTerms0.getBucketByKey("0").getSignificanceScore();
double score01SeparateSets = sigTerms0.getBucketByKey("1").getSignificanceScore();
sigTerms1 = (SignificantTerms) ((InternalFilter) aggs.get("1")).getAggregations().getAsMap().get("sig_terms");
double score10SeparateSets = sigTerms1.getBucketByKey("0").getSignificanceScore();
double score11SeparateSets = sigTerms1.getBucketByKey("1").getSignificanceScore();
assertThat(score00Background, equalTo(score00SeparateSets));
assertThat(score01Background, equalTo(score01SeparateSets));
assertThat(score10Background, equalTo(score10SeparateSets));
assertThat(score11Background, equalTo(score11SeparateSets));
}
use of org.opensearch.search.aggregations.bucket.terms.SignificantTerms in project OpenSearch by opensearch-project.
the class SharedSignificantTermsTestMethods method checkSignificantTermsAggregationCorrect.
private static void checkSignificantTermsAggregationCorrect(OpenSearchIntegTestCase testCase) {
SearchResponse response = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD))).execute().actionGet();
assertSearchResponse(response);
StringTerms classes = response.getAggregations().get("class");
Assert.assertThat(classes.getBuckets().size(), equalTo(2));
for (Terms.Bucket classBucket : classes.getBuckets()) {
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
Assert.assertTrue(aggs.containsKey("sig_terms"));
SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
Assert.assertThat(agg.getBuckets().size(), equalTo(1));
SignificantTerms.Bucket sigBucket = agg.iterator().next();
String term = sigBucket.getKeyAsString();
String classTerm = classBucket.getKeyAsString();
Assert.assertTrue(term.equals(classTerm));
}
}
Aggregations