Search in sources :

Example 11 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsIT method testBadFilteredAnalysis.

public void testBadFilteredAnalysis() throws Exception {
    // Deliberately using a bad choice of filter here for the background context in order
    // to test robustness.
    // We search for the name of a snowboarder but use music-related content (fact_category:1)
    // as the background source of term statistics.
    SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "terje")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("description").minDocCount(2).backgroundFilter(QueryBuilders.termQuery("fact_category", 1))).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    // We expect at least one of the significant terms to have been selected on the basis
    // that it is present in the foreground selection but entirely missing from the filtered
    // background used as context.
    boolean hasMissingBackgroundTerms = false;
    for (Bucket topTerm : topTerms) {
        if (topTerm.getSupersetDf() == 0) {
            hasMissingBackgroundTerms = true;
            break;
        }
    }
    assertTrue(hasMissingBackgroundTerms);
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Bucket(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 12 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsSignificanceScoreIT method testBackgroundVsSeparateSet.

// compute significance score by
// 1. terms agg on class and significant terms
// 2. filter buckets and set the background to the other class and set is_background false
// both should yield exact same result
public void testBackgroundVsSeparateSet(SignificanceHeuristic significanceHeuristicExpectingSuperset, SignificanceHeuristic significanceHeuristicExpectingSeparateSets) throws Exception {
    SearchResponse response1 = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation(terms("class").field(CLASS_FIELD).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).significanceHeuristic(significanceHeuristicExpectingSuperset))).execute().actionGet();
    assertSearchResponse(response1);
    SearchResponse response2 = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation(filter("0", QueryBuilders.termQuery(CLASS_FIELD, "0")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "1")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).addAggregation(filter("1", QueryBuilders.termQuery(CLASS_FIELD, "1")).subAggregation(significantTerms("sig_terms").field(TEXT_FIELD).minDocCount(1).backgroundFilter(QueryBuilders.termQuery(CLASS_FIELD, "0")).significanceHeuristic(significanceHeuristicExpectingSeparateSets))).execute().actionGet();
    StringTerms classes = response1.getAggregations().get("class");
    SignificantTerms sigTerms0 = ((SignificantTerms) (classes.getBucketByKey("0").getAggregations().asMap().get("sig_terms")));
    assertThat(sigTerms0.getBuckets().size(), equalTo(2));
    double score00Background = sigTerms0.getBucketByKey("0").getSignificanceScore();
    double score01Background = sigTerms0.getBucketByKey("1").getSignificanceScore();
    SignificantTerms sigTerms1 = ((SignificantTerms) (classes.getBucketByKey("1").getAggregations().asMap().get("sig_terms")));
    double score10Background = sigTerms1.getBucketByKey("0").getSignificanceScore();
    double score11Background = sigTerms1.getBucketByKey("1").getSignificanceScore();
    Aggregations aggs = response2.getAggregations();
    sigTerms0 = (SignificantTerms) ((InternalFilter) aggs.get("0")).getAggregations().getAsMap().get("sig_terms");
    double score00SeparateSets = sigTerms0.getBucketByKey("0").getSignificanceScore();
    double score01SeparateSets = sigTerms0.getBucketByKey("1").getSignificanceScore();
    sigTerms1 = (SignificantTerms) ((InternalFilter) aggs.get("1")).getAggregations().getAsMap().get("sig_terms");
    double score10SeparateSets = sigTerms1.getBucketByKey("0").getSignificanceScore();
    double score11SeparateSets = sigTerms1.getBucketByKey("1").getSignificanceScore();
    assertThat(score00Background, equalTo(score00SeparateSets));
    assertThat(score01Background, equalTo(score01SeparateSets));
    assertThat(score10Background, equalTo(score10SeparateSets));
    assertThat(score11Background, equalTo(score11SeparateSets));
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) StringTerms(org.elasticsearch.search.aggregations.bucket.terms.StringTerms) InternalFilter(org.elasticsearch.search.aggregations.bucket.filter.InternalFilter) Aggregations(org.elasticsearch.search.aggregations.Aggregations) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 13 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsSignificanceScoreIT method testPlugin.

public void testPlugin() throws Exception {
    String type = randomBoolean() ? "text" : "long";
    String settings = "{\"index.number_of_shards\": 1, \"index.number_of_replicas\": 0}";
    SharedSignificantTermsTestMethods.index01Docs(type, settings, this);
    SearchResponse response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation(terms("class").field(CLASS_FIELD).subAggregation((significantTerms("sig_terms")).field(TEXT_FIELD).significanceHeuristic(new SimpleHeuristic()).minDocCount(1))).execute().actionGet();
    assertSearchResponse(response);
    StringTerms classes = response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    for (Terms.Bucket classBucket : classes.getBuckets()) {
        Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
        assertTrue(aggs.containsKey("sig_terms"));
        SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
        assertThat(agg.getBuckets().size(), equalTo(2));
        Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
        SignificantTerms.Bucket sigBucket = bucketIterator.next();
        String term = sigBucket.getKeyAsString();
        String classTerm = classBucket.getKeyAsString();
        assertTrue(term.equals(classTerm));
        assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
        sigBucket = bucketIterator.next();
        assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
    }
    // we run the same test again but this time we do not call assertSearchResponse() before the assertions
    // the reason is that this would trigger toXContent and we would like to check that this has no potential side effects
    response = client().prepareSearch(INDEX_NAME).setTypes(DOC_TYPE).addAggregation(terms("class").field(CLASS_FIELD).subAggregation((significantTerms("sig_terms")).field(TEXT_FIELD).significanceHeuristic(new SimpleHeuristic()).minDocCount(1))).execute().actionGet();
    classes = (StringTerms) response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    for (Terms.Bucket classBucket : classes.getBuckets()) {
        Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
        assertTrue(aggs.containsKey("sig_terms"));
        SignificantTerms agg = (SignificantTerms) aggs.get("sig_terms");
        assertThat(agg.getBuckets().size(), equalTo(2));
        Iterator<SignificantTerms.Bucket> bucketIterator = agg.iterator();
        SignificantTerms.Bucket sigBucket = bucketIterator.next();
        String term = sigBucket.getKeyAsString();
        String classTerm = classBucket.getKeyAsString();
        assertTrue(term.equals(classTerm));
        assertThat(sigBucket.getSignificanceScore(), closeTo(2.0, 1.e-8));
        sigBucket = bucketIterator.next();
        assertThat(sigBucket.getSignificanceScore(), closeTo(1.0, 1.e-8));
    }
}
Also used : Aggregation(org.elasticsearch.search.aggregations.Aggregation) SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) StringTerms(org.elasticsearch.search.aggregations.bucket.terms.StringTerms) SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) AggregationBuilders.significantTerms(org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms) StringTerms(org.elasticsearch.search.aggregations.bucket.terms.StringTerms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 14 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.

public void testScoresEqualForPositiveAndNegative(SignificanceHeuristic heuristic) throws Exception {
    //check that results for both classes are the same with exclude negatives = false and classes are routing ids
    SearchResponse response = client().prepareSearch("test").addAggregation(terms("class").field("class").subAggregation(significantTerms("mySignificantTerms").field("text").executionHint(randomExecutionHint()).significanceHeuristic(heuristic).minDocCount(1).shardSize(1000).size(1000))).execute().actionGet();
    assertSearchResponse(response);
    StringTerms classes = response.getAggregations().get("class");
    assertThat(classes.getBuckets().size(), equalTo(2));
    Iterator<Terms.Bucket> classBuckets = classes.getBuckets().iterator();
    Aggregations aggregations = classBuckets.next().getAggregations();
    SignificantTerms sigTerms = aggregations.get("mySignificantTerms");
    Collection<SignificantTerms.Bucket> classA = sigTerms.getBuckets();
    Iterator<SignificantTerms.Bucket> classBBucketIterator = sigTerms.getBuckets().iterator();
    assertThat(classA.size(), greaterThan(0));
    for (SignificantTerms.Bucket classABucket : classA) {
        SignificantTerms.Bucket classBBucket = classBBucketIterator.next();
        assertThat(classABucket.getKey(), equalTo(classBBucket.getKey()));
        assertThat(classABucket.getSignificanceScore(), closeTo(classBBucket.getSignificanceScore(), 1.e-5));
    }
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) StringTerms(org.elasticsearch.search.aggregations.bucket.terms.StringTerms) Aggregations(org.elasticsearch.search.aggregations.Aggregations) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 15 with SignificantTerms

use of org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms in project elasticsearch by elastic.

the class TermsShardMinDocCountIT method testShardMinDocCountSignificantTermsTest.

// see https://github.com/elastic/elasticsearch/issues/5998
public void testShardMinDocCountSignificantTermsTest() throws Exception {
    String textMappings;
    if (randomBoolean()) {
        textMappings = "type=long";
    } else {
        textMappings = "type=text,fielddata=true";
    }
    assertAcked(prepareCreate(index).setSettings(SETTING_NUMBER_OF_SHARDS, 1, SETTING_NUMBER_OF_REPLICAS, 0).addMapping(type, "text", textMappings));
    List<IndexRequestBuilder> indexBuilders = new ArrayList<>();
    //high score but low doc freq
    addTermsDocs("1", 1, 0, indexBuilders);
    addTermsDocs("2", 1, 0, indexBuilders);
    addTermsDocs("3", 1, 0, indexBuilders);
    addTermsDocs("4", 1, 0, indexBuilders);
    //low score but high doc freq
    addTermsDocs("5", 3, 1, indexBuilders);
    addTermsDocs("6", 3, 1, indexBuilders);
    // make sure the terms all get score > 0 except for this one
    addTermsDocs("7", 0, 3, indexBuilders);
    indexRandom(true, false, indexBuilders);
    // first, check that indeed when not setting the shardMinDocCount parameter 0 terms are returned
    SearchResponse response = client().prepareSearch(index).addAggregation((filter("inclass", QueryBuilders.termQuery("class", true))).subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).size(2).executionHint(randomExecutionHint()))).execute().actionGet();
    assertSearchResponse(response);
    InternalFilter filteredBucket = response.getAggregations().get("inclass");
    SignificantTerms sigterms = filteredBucket.getAggregations().get("mySignificantTerms");
    assertThat(sigterms.getBuckets().size(), equalTo(0));
    response = client().prepareSearch(index).addAggregation((filter("inclass", QueryBuilders.termQuery("class", true))).subAggregation(significantTerms("mySignificantTerms").field("text").minDocCount(2).shardMinDocCount(2).size(2).executionHint(randomExecutionHint()))).execute().actionGet();
    assertSearchResponse(response);
    filteredBucket = response.getAggregations().get("inclass");
    sigterms = filteredBucket.getAggregations().get("mySignificantTerms");
    assertThat(sigterms.getBuckets().size(), equalTo(2));
}
Also used : IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) InternalFilter(org.elasticsearch.search.aggregations.bucket.filter.InternalFilter) ArrayList(java.util.ArrayList) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Aggregations

SearchResponse (org.elasticsearch.action.search.SearchResponse)23 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)23 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)23 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)14 Bucket (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket)6 StringTerms (org.elasticsearch.search.aggregations.bucket.terms.StringTerms)6 AggregationBuilders.significantTerms (org.elasticsearch.search.aggregations.AggregationBuilders.significantTerms)5 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 HashSet (java.util.HashSet)4 Aggregation (org.elasticsearch.search.aggregations.Aggregation)3 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)3 Aggregations (org.elasticsearch.search.aggregations.Aggregations)2 InternalFilter (org.elasticsearch.search.aggregations.bucket.filter.InternalFilter)2 ArrayList (java.util.ArrayList)1 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)1 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)1 ChiSquare (org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare)1 GND (org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)1 JLHScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore)1