Search in sources :

Example 16 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testPartiallyUnmappedWithFormat.

public void testPartiallyUnmappedWithFormat() throws Exception {
    SearchResponse response = client().prepareSearch("idx_unmapped", "test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(boolQuery().should(termQuery("description", "the")).should(termQuery("description", "terje"))).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()).minDocCount(1).format("0000")).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    for (int i = 1; i <= 3; i++) {
        String key = String.format(Locale.ROOT, "%04d", i);
        SignificantTerms.Bucket bucket = topTerms.getBucketByKey(key);
        assertThat(bucket, notNullValue());
        assertThat(bucket.getKeyAsString(), equalTo(key));
    }
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Matchers.containsString(org.hamcrest.Matchers.containsString) Bucket(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 17 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testTextAnalysisChiSquare.

public void testTextAnalysisChiSquare() throws Exception {
    SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).significanceHeuristic(new ChiSquare(false, true)).minDocCount(2)).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    checkExpectedStringTermsFound(topTerms);
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 18 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testIncludeExclude.

public void testIncludeExclude() throws Exception {
    SearchResponse response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(null, "weller"))).get();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    Set<String> terms = new HashSet<>();
    for (Bucket topTerm : topTerms) {
        terms.add(topTerm.getKeyAsString());
    }
    assertThat(terms, hasSize(6));
    assertThat(terms.contains("jam"), is(true));
    assertThat(terms.contains("council"), is(true));
    assertThat(terms.contains("style"), is(true));
    assertThat(terms.contains("paul"), is(true));
    assertThat(terms.contains("of"), is(true));
    assertThat(terms.contains("the"), is(true));
    response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude("weller", null))).get();
    assertSearchResponse(response);
    topTerms = response.getAggregations().get("mySignificantTerms");
    terms = new HashSet<>();
    for (Bucket topTerm : topTerms) {
        terms.add(topTerm.getKeyAsString());
    }
    assertThat(terms, hasSize(1));
    assertThat(terms.contains("weller"), is(true));
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Bucket(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) Matchers.containsString(org.hamcrest.Matchers.containsString) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) HashSet(java.util.HashSet)

Example 19 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testFilteredAnalysis.

public void testFilteredAnalysis() throws Exception {
    SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "weller")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").minDocCount(1).backgroundFilter(QueryBuilders.termsQuery("description", "paul"))).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    HashSet<String> topWords = new HashSet<String>();
    for (Bucket topTerm : topTerms) {
        topWords.add(topTerm.getKeyAsString());
    }
    //The word "paul" should be a constant of all docs in the background set and therefore not seen as significant
    assertFalse(topWords.contains("paul"));
    //"Weller" is the only Paul who was in The Jam and therefore this should be identified as a differentiator from the background of all other Pauls.
    assertTrue(topWords.contains("jam"));
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Bucket(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) Matchers.containsString(org.hamcrest.Matchers.containsString) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) HashSet(java.util.HashSet)

Example 20 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testDefaultSignificanceHeuristic.

public void testDefaultSignificanceHeuristic() throws Exception {
    SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).significanceHeuristic(new JLHScore()).minDocCount(2)).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    checkExpectedStringTermsFound(topTerms);
}
Also used : JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Aggregations

SearchResponse (org.elasticsearch.action.search.SearchResponse)23 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)23 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)23 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)14 Bucket (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket)6 StringTerms (org.elasticsearch.search.aggregations.bucket.terms.StringTerms)6 AggregationBuilders.significantTerms (org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms)5 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 HashSet (java.util.HashSet)4 Aggregation (org.elasticsearch.search.aggregations.Aggregation)3 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)3 Aggregations (org.elasticsearch.search.aggregations.Aggregations)2 InternalFilter (org.elasticsearch.search.aggregations.bucket.filter.InternalFilter)2 ArrayList (java.util.ArrayList)1 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)1 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)1 ChiSquare (org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare)1 GND (org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)1 JLHScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore)1