use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testSubAggregations.
/**
* A simple test that adds a sub-aggregation to a significant terms aggregation,
* to help check that sub-aggregation collection is handled correctly.
*/
public void testSubAggregations() throws Exception {
indexEqualTestData();
QueryBuilder query = QueryBuilders.termsQuery(TEXT_FIELD, "a", "b");
AggregationBuilder subAgg = terms("class").field(CLASS_FIELD);
AggregationBuilder agg = significantTerms("significant_terms").field(TEXT_FIELD).executionHint(randomExecutionHint()).significanceHeuristic(new ChiSquare(true, true)).minDocCount(1).shardSize(1000).size(1000).subAggregation(subAgg);
SearchResponse response = client().prepareSearch("test").setQuery(query).addAggregation(agg).get();
assertSearchResponse(response);
SignificantTerms sigTerms = response.getAggregations().get("significant_terms");
assertThat(sigTerms.getBuckets().size(), equalTo(2));
for (SignificantTerms.Bucket bucket : sigTerms) {
StringTerms terms = bucket.getAggregations().get("class");
assertThat(terms.getBuckets().size(), equalTo(2));
}
}
use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.
public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristic) throws Exception {
// check that results for both classes are the same with exclude negatives = false and classes are routing ids
SearchRequestBuilder request;
if (randomBoolean()) {
request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms").field("text").executionHint(randomExecutionHint()).significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
} else {
request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantText("mySignificantTerms", "text").significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
}
SearchResponse response = request.get();
assertSearchResponse(response);
assertSearchResponse(response);
StringTerms classes = response.getAggregations().get("class");
assertThat(classes.getBuckets().size(), equalTo(2));
Iterator<? extends Terms.Bucket> classBuckets = classes.getBuckets().iterator();
Aggregations aggregations = classBuckets.next().getAggregations();
SignificantTerms sigTerms = aggregations.get("mySignificantTerms");
List<? extends SignificantTerms.Bucket> classA = sigTerms.getBuckets();
Iterator<SignificantTerms.Bucket> classBBucketIterator = sigTerms.iterator();
assertThat(classA.size(), greaterThan(0));
for (SignificantTerms.Bucket classABucket : classA) {
SignificantTerms.Bucket classBBucket = classBBucketIterator.next();
assertThat(classABucket.getKey(), equalTo(classBBucket.getKey()));
assertThat(classABucket.getSignificanceScore(), closeTo(classBBucket.getSignificanceScore(), 1.e-5));
}
}
use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.
the class SignificantTermsSignificanceScoreIT method testBackgroundVsSeparateSet.
// compute significance score by
// 1. terms agg on class and significant terms
// 2. filter buckets and set the background to the other class and set is_background false
// both should yield exact same result
public void testBackgroundVsSeparateSet(SignificanceHeuristic significanceHeuristicExpectingSuperset, SignificanceHeuristic significanceHeuristicExpectingSeparateSets, String type) throws Exception {
final boolean useSigText = randomBoolean() && type.equals("text");
SearchRequestBuilder request1;
if (useSigText) {
request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
} else {
request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
}
SearchResponse response1 = request1.get();
assertSearchResponse(response1);
SearchRequestBuilder request2;
if (useSigText) {
request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
} else {
request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
}
SearchResponse response2 = request2.get();
StringTerms classes = response1.getAggregations().get("class");
SignificantTerms sigTerms0 = ((SignificantTerms) (classes.getBucketByKey("0").getAggregations().asMap().get("sig_terms")));
assertThat(sigTerms0.getBuckets().size(), equalTo(2));
double score00Background = sigTerms0.getBucketByKey("0").getSignificanceScore();
double score01Background = sigTerms0.getBucketByKey("1").getSignificanceScore();
SignificantTerms sigTerms1 = ((SignificantTerms) (classes.getBucketByKey("1").getAggregations().asMap().get("sig_terms")));
double score10Background = sigTerms1.getBucketByKey("0").getSignificanceScore();
double score11Background = sigTerms1.getBucketByKey("1").getSignificanceScore();
Aggregations aggs = response2.getAggregations();
sigTerms0 = (SignificantTerms) ((InternalFilter) aggs.get("0")).getAggregations().getAsMap().get("sig_terms");
double score00SeparateSets = sigTerms0.getBucketByKey("0").getSignificanceScore();
double score01SeparateSets = sigTerms0.getBucketByKey("1").getSignificanceScore();
sigTerms1 = (SignificantTerms) ((InternalFilter) aggs.get("1")).getAggregations().getAsMap().get("sig_terms");
double score10SeparateSets = sigTerms1.getBucketByKey("0").getSignificanceScore();
double score11SeparateSets = sigTerms1.getBucketByKey("1").getSignificanceScore();
assertThat(score00Background, equalTo(score00SeparateSets));
assertThat(score01Background, equalTo(score01SeparateSets));
assertThat(score10Background, equalTo(score10SeparateSets));
assertThat(score11Background, equalTo(score11SeparateSets));
}
use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.
the class NestedIT method testNestNestedAggs.
public void testNestNestedAggs() throws Exception {
SearchResponse response = client().prepareSearch("idx_nested_nested_aggs").addAggregation(nested("level1", "nested1").subAggregation(terms("a").field("nested1.a.keyword").collectMode(aggCollectionMode).subAggregation(nested("level2", "nested1.nested2").subAggregation(sum("sum").field("nested1.nested2.b"))))).get();
assertSearchResponse(response);
Nested level1 = response.getAggregations().get("level1");
assertThat(level1, notNullValue());
assertThat(level1.getName(), equalTo("level1"));
assertThat(level1.getDocCount(), equalTo(2L));
StringTerms a = level1.getAggregations().get("a");
Terms.Bucket bBucket = a.getBucketByKey("a");
assertThat(bBucket.getDocCount(), equalTo(1L));
Nested level2 = bBucket.getAggregations().get("level2");
assertThat(level2.getDocCount(), equalTo(1L));
Sum sum = level2.getAggregations().get("sum");
assertThat(sum.getValue(), equalTo(2d));
a = level1.getAggregations().get("a");
bBucket = a.getBucketByKey("b");
assertThat(bBucket.getDocCount(), equalTo(1L));
level2 = bBucket.getAggregations().get("level2");
assertThat(level2.getDocCount(), equalTo(1L));
sum = level2.getAggregations().get("sum");
assertThat(sum.getValue(), equalTo(2d));
}
use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.
the class AutoDateHistogramAggregatorTests method testAsSubAgg.
public void testAsSubAgg() throws IOException {
AggregationBuilder builder = new TermsAggregationBuilder("k1").field("k1").subAggregation(new AutoDateHistogramAggregationBuilder("dh").field(AGGREGABLE_DATE).setNumBuckets(3).subAggregation(new MaxAggregationBuilder("max").field("n")));
asSubAggTestCase(builder, (StringTerms terms) -> {
StringTerms.Bucket a = terms.getBucketByKey("a");
InternalAutoDateHistogram adh = a.getAggregations().get("dh");
Map<String, Integer> expectedDocCount = new TreeMap<>();
expectedDocCount.put("2020-01-01T00:00:00.000Z", 2);
expectedDocCount.put("2021-01-01T00:00:00.000Z", 2);
assertThat(bucketCountsAsMap(adh), equalTo(expectedDocCount));
Map<String, Double> expectedMax = new TreeMap<>();
expectedMax.put("2020-01-01T00:00:00.000Z", 2.0);
expectedMax.put("2021-01-01T00:00:00.000Z", 4.0);
assertThat(maxAsMap(adh), equalTo(expectedMax));
StringTerms.Bucket b = terms.getBucketByKey("b");
InternalAutoDateHistogram bdh = b.getAggregations().get("dh");
expectedDocCount.clear();
expectedDocCount.put("2020-02-01T00:00:00.000Z", 1);
assertThat(bucketCountsAsMap(bdh), equalTo(expectedDocCount));
expectedMax.clear();
expectedMax.put("2020-02-01T00:00:00.000Z", 5.0);
assertThat(maxAsMap(bdh), equalTo(expectedMax));
});
builder = new TermsAggregationBuilder("k2").field("k2").subAggregation(builder);
asSubAggTestCase(builder, (StringTerms terms) -> {
StringTerms.Bucket a = terms.getBucketByKey("a");
StringTerms ak1 = a.getAggregations().get("k1");
StringTerms.Bucket ak1a = ak1.getBucketByKey("a");
InternalAutoDateHistogram ak1adh = ak1a.getAggregations().get("dh");
Map<String, Integer> expectedDocCount = new TreeMap<>();
expectedDocCount.put("2020-01-01T00:00:00.000Z", 2);
expectedDocCount.put("2021-01-01T00:00:00.000Z", 1);
assertThat(bucketCountsAsMap(ak1adh), equalTo(expectedDocCount));
Map<String, Double> expectedMax = new TreeMap<>();
expectedMax.put("2020-01-01T00:00:00.000Z", 2.0);
expectedMax.put("2021-01-01T00:00:00.000Z", 3.0);
assertThat(maxAsMap(ak1adh), equalTo(expectedMax));
StringTerms.Bucket b = terms.getBucketByKey("b");
StringTerms bk1 = b.getAggregations().get("k1");
StringTerms.Bucket bk1a = bk1.getBucketByKey("a");
InternalAutoDateHistogram bk1adh = bk1a.getAggregations().get("dh");
expectedDocCount.clear();
expectedDocCount.put("2021-03-01T00:00:00.000Z", 1);
assertThat(bucketCountsAsMap(bk1adh), equalTo(expectedDocCount));
expectedMax.clear();
expectedMax.put("2021-03-01T00:00:00.000Z", 4.0);
assertThat(maxAsMap(bk1adh), equalTo(expectedMax));
StringTerms.Bucket bk1b = bk1.getBucketByKey("b");
InternalAutoDateHistogram bk1bdh = bk1b.getAggregations().get("dh");
expectedDocCount.clear();
expectedDocCount.put("2020-02-01T00:00:00.000Z", 1);
assertThat(bucketCountsAsMap(bk1bdh), equalTo(expectedDocCount));
expectedMax.clear();
expectedMax.put("2020-02-01T00:00:00.000Z", 5.0);
assertThat(maxAsMap(bk1bdh), equalTo(expectedMax));
});
}
Aggregations