Search in sources :

Example 1 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class SignificantTermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    numberOfAggregatorsCreated++;
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection .
        // Use default heuristic to avoid any wrong-ranking caused by
        // distributed counting
        // but request double the usual amount.
        // We typically need more than the number of "top" terms requested
        // by other aggregations
        // as the significance algorithm is in less of a position to
        // down-select at shard-level -
        // some of the things we want to find have only one occurrence on
        // each shard and as
        // such are impossible to differentiate from non-significant terms
        // at that early stage.
        bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        if (execution == null) {
            if (Aggregator.descendsFromBucketAggregator(parent)) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                execution = ExecutionMode.GLOBAL_ORDINALS;
            }
        }
        assert execution != null;
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            throw new UnsupportedOperationException("No support for examining floating point numerics");
        }
        IncludeExclude.LongFilter longFilter = null;
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 2 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class DoubleTermsIT method testIncludeExcludeResults.

private void testIncludeExcludeResults(double[] includes, double[] excludes, double[] expecteds) {
    SearchResponse response = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(SINGLE_VALUED_FIELD_NAME).includeExclude(new IncludeExclude(includes, excludes)).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
    assertSearchResponse(response);
    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(expecteds.length));
    for (int i = 0; i < expecteds.length; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]);
        assertThat(bucket, notNullValue());
        assertThat(bucket.getDocCount(), equalTo(1L));
    }
}
Also used : Bucket(org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 3 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class LongTermsIT method runTestFieldWithPartitionedFiltering.

private void runTestFieldWithPartitionedFiltering(String field) throws Exception {
    // Find total number of unique terms
    SearchResponse allResponse = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
    assertSearchResponse(allResponse);
    Terms terms = allResponse.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    int expectedCardinality = terms.getBuckets().size();
    // Gather terms using partitioned aggregations
    final int numPartitions = randomIntBetween(2, 4);
    Set<Number> foundTerms = new HashSet<>();
    for (int partition = 0; partition < numPartitions; partition++) {
        SearchResponse response = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).includeExclude(new IncludeExclude(partition, numPartitions)).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
        assertSearchResponse(response);
        terms = response.getAggregations().get("terms");
        assertThat(terms, notNullValue());
        assertThat(terms.getName(), equalTo("terms"));
        for (Bucket bucket : terms.getBuckets()) {
            assertFalse(foundTerms.contains(bucket.getKeyAsNumber()));
            foundTerms.add(bucket.getKeyAsNumber());
        }
    }
    assertEquals(expectedCardinality, foundTerms.size());
}
Also used : Bucket(org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) HashSet(java.util.HashSet)

Example 4 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class SignificantTermsIT method testIncludeExcludeExactValues.

public void testIncludeExcludeExactValues() throws Exception {
    String[] incExcTerms = { "weller", "nosuchterm" };
    SearchResponse response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(null, incExcTerms))).get();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    Set<String> terms = new HashSet<>();
    for (Bucket topTerm : topTerms) {
        terms.add(topTerm.getKeyAsString());
    }
    assertEquals(new HashSet<String>(Arrays.asList("jam", "council", "style", "paul", "of", "the")), terms);
    response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(incExcTerms, null))).get();
    assertSearchResponse(response);
    topTerms = response.getAggregations().get("mySignificantTerms");
    terms = new HashSet<>();
    for (Bucket topTerm : topTerms) {
        terms.add(topTerm.getKeyAsString());
    }
    assertThat(terms, hasSize(1));
    assertThat(terms.contains("weller"), is(true));
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) Bucket(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Matchers.containsString(org.hamcrest.Matchers.containsString) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) HashSet(java.util.HashSet)

Example 5 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class SignificantTermsIT method testStructuredAnalysisWithIncludeExclude.

public void testStructuredAnalysisWithIncludeExclude() throws Exception {
    long[] excludeTerms = { MUSIC_CATEGORY };
    SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "paul")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()).minDocCount(1).includeExclude(new IncludeExclude(null, excludeTerms))).execute().actionGet();
    assertSearchResponse(response);
    SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
    Number topCategory = (Number) topTerms.getBuckets().iterator().next().getKey();
    assertTrue(topCategory.equals(Long.valueOf(OTHER_CATEGORY)));
}
Also used : SignificantTerms(org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) TermQueryBuilder(org.elasticsearch.index.query.TermQueryBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Aggregations

IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)33 SearchResponse (org.elasticsearch.action.search.SearchResponse)19 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)18 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)16 Bucket (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket)7 Bucket (org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket)6 HashSet (java.util.HashSet)5 TreeSet (java.util.TreeSet)4 BytesRef (org.apache.lucene.util.BytesRef)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 RegExp (org.apache.lucene.util.automaton.RegExp)3 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)3 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)3 LongBitSet (org.apache.lucene.util.LongBitSet)2 QueryParseContext (org.elasticsearch.index.query.QueryParseContext)2 Script (org.elasticsearch.script.Script)2 DocValueFormat (org.elasticsearch.search.DocValueFormat)2 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)2 Bucket (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket)2 BucketCountThresholds (org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds)2