Search in sources :

Example 1 with StringTerms

use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.

the class SignificantTermsSignificanceScoreIT method testSubAggregations.

/**
 * A simple test that adds a sub-aggregation to a significant terms aggregation,
 * to help check that sub-aggregation collection is handled correctly.
 */
public void testSubAggregations() throws Exception {
    indexEqualTestData();
    QueryBuilder query = QueryBuilders.termsQuery(TEXT_FIELD, "a", "b");
    AggregationBuilder subAgg = terms("class").field(CLASS_FIELD);
    AggregationBuilder agg = significantTerms("significant_terms").field(TEXT_FIELD).executionHint(randomExecutionHint()).significanceHeuristic(new ChiSquare(true, true)).minDocCount(1).shardSize(1000).size(1000).subAggregation(subAgg);
    SearchResponse response = client().prepareSearch("test").setQuery(query).addAggregation(agg).get();
    assertSearchResponse(response);
    SignificantTerms sigTerms = response.getAggregations().get("significant_terms");
    assertThat(sigTerms.getBuckets().size(), equalTo(2));
    for (SignificantTerms.Bucket bucket : sigTerms) {
        StringTerms terms = bucket.getAggregations().get("class");
        assertThat(terms.getBuckets().size(), equalTo(2));
    }
}
Also used : SignificantTerms(org.opensearch.search.aggregations.bucket.terms.SignificantTerms) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) ChiSquare(org.opensearch.search.aggregations.bucket.terms.heuristic.ChiSquare) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) QueryBuilder(org.opensearch.index.query.QueryBuilder) OpenSearchAssertions.assertSearchResponse(org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse) SearchResponse(org.opensearch.action.search.SearchResponse)

Example 2 with StringTerms

use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.

the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.

public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristic) throws Exception {
    // check that results for both classes are the same with exclude negatives = false and classes are routing ids
    SearchRequestBuilder request;
    if (randomBoolean()) {
        request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms").field("text").executionHint(randomExecutionHint()).significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
    } else {
        request = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantText("mySignificantTerms", "text").significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000)));
    }
    SearchResponse response = request.get();
    assertSearchResponse(response);
    assertSearchResponse(response);
    StringTerms classes = response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    Iterator<? extends Terms.Bucket> classBuckets = classes.getBuckets().iterator();
    Aggregations aggregations = classBuckets.next().getAggregations();
    SignificantTerms sigTerms = aggregations.get("mySignificantTerms");
    List<? extends SignificantTerms.Bucket> classA = sigTerms.getBuckets();
    Iterator<SignificantTerms.Bucket> classBBucketIterator = sigTerms.iterator();
    assertThat(classA.size(), greaterThan(0));
    for (SignificantTerms.Bucket classABucket : classA) {
        SignificantTerms.Bucket classBBucket = classBBucketIterator.next();
        assertThat(classABucket.getKey(), equalTo(classBBucket.getKey()));
        assertThat(classABucket.getSignificanceScore(), closeTo(classBBucket.getSignificanceScore(), 1.e-5));
    }
}
Also used : SignificantTerms(org.opensearch.search.aggregations.bucket.terms.SignificantTerms) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) Aggregations(org.opensearch.search.aggregations.Aggregations) SignificantTerms(org.opensearch.search.aggregations.bucket.terms.SignificantTerms) Terms(org.opensearch.search.aggregations.bucket.terms.Terms) AggregationBuilders.significantTerms(org.opensearch.search.aggregations.AggregationBuilders.significantTerms) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) OpenSearchAssertions.assertSearchResponse(org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse) SearchResponse(org.opensearch.action.search.SearchResponse)

Example 3 with StringTerms

use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.

the class SignificantTermsSignificanceScoreIT method testBackgroundVsSeparateSet.

// compute significance score by
// 1. terms agg on class and significant terms
// 2. filter buckets and set the background to the other class and set is_background false
// both should yield exact same result
public void testBackgroundVsSeparateSet(SignificanceHeuristic significanceHeuristicExpectingSuperset, SignificanceHeuristic significanceHeuristicExpectingSeparateSets, String type) throws Exception {
    final boolean useSigText = randomBoolean() && type.equals("text");
    SearchRequestBuilder request1;
    if (useSigText) {
        request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
    } else {
        request1 = client().prepareSearch(INDEX_NAME).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset)));
    }
    SearchResponse response1 = request1.get();
    assertSearchResponse(response1);
    SearchRequestBuilder request2;
    if (useSigText) {
        request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantText("sig_terms", TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
    } else {
        request2 = client().prepareSearch(INDEX_NAME).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets)));
    }
    SearchResponse response2 = request2.get();
    StringTerms classes = response1.getAggregations().get("class");
    SignificantTerms sigTerms0 = ((SignificantTerms) (classes.getBucketByKey("0").getAggregations().asMap().get("sig_terms")));
    assertThat(sigTerms0.getBuckets().size(), equalTo(2));
    double score00Background = sigTerms0.getBucketByKey("0").getSignificanceScore();
    double score01Background = sigTerms0.getBucketByKey("1").getSignificanceScore();
    SignificantTerms sigTerms1 = ((SignificantTerms) (classes.getBucketByKey("1").getAggregations().asMap().get("sig_terms")));
    double score10Background = sigTerms1.getBucketByKey("0").getSignificanceScore();
    double score11Background = sigTerms1.getBucketByKey("1").getSignificanceScore();
    Aggregations aggs = response2.getAggregations();
    sigTerms0 = (SignificantTerms) ((InternalFilter) aggs.get("0")).getAggregations().getAsMap().get("sig_terms");
    double score00SeparateSets = sigTerms0.getBucketByKey("0").getSignificanceScore();
    double score01SeparateSets = sigTerms0.getBucketByKey("1").getSignificanceScore();
    sigTerms1 = (SignificantTerms) ((InternalFilter) aggs.get("1")).getAggregations().getAsMap().get("sig_terms");
    double score10SeparateSets = sigTerms1.getBucketByKey("0").getSignificanceScore();
    double score11SeparateSets = sigTerms1.getBucketByKey("1").getSignificanceScore();
    assertThat(score00Background, equalTo(score00SeparateSets));
    assertThat(score01Background, equalTo(score01SeparateSets));
    assertThat(score10Background, equalTo(score10SeparateSets));
    assertThat(score11Background, equalTo(score11SeparateSets));
}
Also used : SignificantTerms(org.opensearch.search.aggregations.bucket.terms.SignificantTerms) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) InternalFilter(org.opensearch.search.aggregations.bucket.filter.InternalFilter) Aggregations(org.opensearch.search.aggregations.Aggregations) OpenSearchAssertions.assertSearchResponse(org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse) SearchResponse(org.opensearch.action.search.SearchResponse)

Example 4 with StringTerms

use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.

the class NestedIT method testNestNestedAggs.

public void testNestNestedAggs() throws Exception {
    SearchResponse response = client().prepareSearch("idx_nested_nested_aggs").addAggregation(nested("level1", "nested1").subAggregation(terms("a").field("nested1.a.keyword").collectMode(aggCollectionMode).subAggregation(nested("level2", "nested1.nested2").subAggregation(sum("sum").field("nested1.nested2.b"))))).get();
    assertSearchResponse(response);
    Nested level1 = response.getAggregations().get("level1");
    assertThat(level1, notNullValue());
    assertThat(level1.getName(), equalTo("level1"));
    assertThat(level1.getDocCount(), equalTo(2L));
    StringTerms a = level1.getAggregations().get("a");
    Terms.Bucket bBucket = a.getBucketByKey("a");
    assertThat(bBucket.getDocCount(), equalTo(1L));
    Nested level2 = bBucket.getAggregations().get("level2");
    assertThat(level2.getDocCount(), equalTo(1L));
    Sum sum = level2.getAggregations().get("sum");
    assertThat(sum.getValue(), equalTo(2d));
    a = level1.getAggregations().get("a");
    bBucket = a.getBucketByKey("b");
    assertThat(bBucket.getDocCount(), equalTo(1L));
    level2 = bBucket.getAggregations().get("level2");
    assertThat(level2.getDocCount(), equalTo(1L));
    sum = level2.getAggregations().get("sum");
    assertThat(sum.getValue(), equalTo(2d));
}
Also used : Bucket(org.opensearch.search.aggregations.bucket.terms.Terms.Bucket) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) Nested(org.opensearch.search.aggregations.bucket.nested.Nested) LongTerms(org.opensearch.search.aggregations.bucket.terms.LongTerms) Terms(org.opensearch.search.aggregations.bucket.terms.Terms) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) Sum(org.opensearch.search.aggregations.metrics.Sum) OpenSearchAssertions.assertSearchResponse(org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse) SearchResponse(org.opensearch.action.search.SearchResponse)

Example 5 with StringTerms

use of org.opensearch.search.aggregations.bucket.terms.StringTerms in project OpenSearch by opensearch-project.

the class AutoDateHistogramAggregatorTests method testAsSubAgg.

public void testAsSubAgg() throws IOException {
    AggregationBuilder builder = new TermsAggregationBuilder("k1").field("k1").subAggregation(new AutoDateHistogramAggregationBuilder("dh").field(AGGREGABLE_DATE).setNumBuckets(3).subAggregation(new MaxAggregationBuilder("max").field("n")));
    asSubAggTestCase(builder, (StringTerms terms) -> {
        StringTerms.Bucket a = terms.getBucketByKey("a");
        InternalAutoDateHistogram adh = a.getAggregations().get("dh");
        Map<String, Integer> expectedDocCount = new TreeMap<>();
        expectedDocCount.put("2020-01-01T00:00:00.000Z", 2);
        expectedDocCount.put("2021-01-01T00:00:00.000Z", 2);
        assertThat(bucketCountsAsMap(adh), equalTo(expectedDocCount));
        Map<String, Double> expectedMax = new TreeMap<>();
        expectedMax.put("2020-01-01T00:00:00.000Z", 2.0);
        expectedMax.put("2021-01-01T00:00:00.000Z", 4.0);
        assertThat(maxAsMap(adh), equalTo(expectedMax));
        StringTerms.Bucket b = terms.getBucketByKey("b");
        InternalAutoDateHistogram bdh = b.getAggregations().get("dh");
        expectedDocCount.clear();
        expectedDocCount.put("2020-02-01T00:00:00.000Z", 1);
        assertThat(bucketCountsAsMap(bdh), equalTo(expectedDocCount));
        expectedMax.clear();
        expectedMax.put("2020-02-01T00:00:00.000Z", 5.0);
        assertThat(maxAsMap(bdh), equalTo(expectedMax));
    });
    builder = new TermsAggregationBuilder("k2").field("k2").subAggregation(builder);
    asSubAggTestCase(builder, (StringTerms terms) -> {
        StringTerms.Bucket a = terms.getBucketByKey("a");
        StringTerms ak1 = a.getAggregations().get("k1");
        StringTerms.Bucket ak1a = ak1.getBucketByKey("a");
        InternalAutoDateHistogram ak1adh = ak1a.getAggregations().get("dh");
        Map<String, Integer> expectedDocCount = new TreeMap<>();
        expectedDocCount.put("2020-01-01T00:00:00.000Z", 2);
        expectedDocCount.put("2021-01-01T00:00:00.000Z", 1);
        assertThat(bucketCountsAsMap(ak1adh), equalTo(expectedDocCount));
        Map<String, Double> expectedMax = new TreeMap<>();
        expectedMax.put("2020-01-01T00:00:00.000Z", 2.0);
        expectedMax.put("2021-01-01T00:00:00.000Z", 3.0);
        assertThat(maxAsMap(ak1adh), equalTo(expectedMax));
        StringTerms.Bucket b = terms.getBucketByKey("b");
        StringTerms bk1 = b.getAggregations().get("k1");
        StringTerms.Bucket bk1a = bk1.getBucketByKey("a");
        InternalAutoDateHistogram bk1adh = bk1a.getAggregations().get("dh");
        expectedDocCount.clear();
        expectedDocCount.put("2021-03-01T00:00:00.000Z", 1);
        assertThat(bucketCountsAsMap(bk1adh), equalTo(expectedDocCount));
        expectedMax.clear();
        expectedMax.put("2021-03-01T00:00:00.000Z", 4.0);
        assertThat(maxAsMap(bk1adh), equalTo(expectedMax));
        StringTerms.Bucket bk1b = bk1.getBucketByKey("b");
        InternalAutoDateHistogram bk1bdh = bk1b.getAggregations().get("dh");
        expectedDocCount.clear();
        expectedDocCount.put("2020-02-01T00:00:00.000Z", 1);
        assertThat(bucketCountsAsMap(bk1bdh), equalTo(expectedDocCount));
        expectedMax.clear();
        expectedMax.put("2020-02-01T00:00:00.000Z", 5.0);
        assertThat(maxAsMap(bk1bdh), equalTo(expectedMax));
    });
}
Also used : AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) DerivativePipelineAggregationBuilder(org.opensearch.search.aggregations.pipeline.DerivativePipelineAggregationBuilder) MaxAggregationBuilder(org.opensearch.search.aggregations.metrics.MaxAggregationBuilder) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) TreeMap(java.util.TreeMap) MaxAggregationBuilder(org.opensearch.search.aggregations.metrics.MaxAggregationBuilder) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms)

Aggregations

StringTerms (org.opensearch.search.aggregations.bucket.terms.StringTerms)17 ArrayList (java.util.ArrayList)8 TermsAggregationBuilder (org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder)8 BytesRef (org.apache.lucene.util.BytesRef)7 SearchResponse (org.opensearch.action.search.SearchResponse)6 AggregationBuilder (org.opensearch.search.aggregations.AggregationBuilder)6 OpenSearchAssertions.assertSearchResponse (org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse)6 HashMap (java.util.HashMap)5 Map (java.util.Map)5 IndexReader (org.apache.lucene.index.IndexReader)5 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)5 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)5 Directory (org.apache.lucene.store.Directory)5 SignificantTerms (org.opensearch.search.aggregations.bucket.terms.SignificantTerms)5 Terms (org.opensearch.search.aggregations.bucket.terms.Terms)5 SortedSetDocValuesField (org.apache.lucene.document.SortedSetDocValuesField)4 IndexSearcher (org.apache.lucene.search.IndexSearcher)4 MappedFieldType (org.opensearch.index.mapper.MappedFieldType)4 Aggregation (org.opensearch.search.aggregations.Aggregation)4 LongTerms (org.opensearch.search.aggregations.bucket.terms.LongTerms)4