Search in sources :

Example 11 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class IncludeExcludeTests method testPartitionedEquals.

public void testPartitionedEquals() throws IOException {
    IncludeExclude serialized = serialize(new IncludeExclude(3, 20), IncludeExclude.INCLUDE_FIELD);
    assertFalse(serialized.isRegexBased());
    assertTrue(serialized.isPartitionBased());
    IncludeExclude same = new IncludeExclude(3, 20);
    assertEquals(serialized, same);
    assertEquals(serialized.hashCode(), same.hashCode());
    IncludeExclude differentParam1 = new IncludeExclude(4, 20);
    assertFalse(serialized.equals(differentParam1));
    assertTrue(serialized.hashCode() != differentParam1.hashCode());
    IncludeExclude differentParam2 = new IncludeExclude(3, 21);
    assertFalse(serialized.equals(differentParam2));
    assertTrue(serialized.hashCode() != differentParam2.hashCode());
}
Also used : IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)

Example 12 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class SignificantTermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    numberOfAggregatorsCreated++;
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection .
        // Use default heuristic to avoid any wrong-ranking caused by
        // distributed counting
        // but request double the usual amount.
        // We typically need more than the number of "top" terms requested
        // by other aggregations
        // as the significance algorithm is in less of a position to
        // down-select at shard-level -
        // some of the things we want to find have only one occurrence on
        // each shard and as
        // such are impossible to differentiate from non-significant terms
        // at that early stage.
        bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        if (execution == null) {
            if (Aggregator.descendsFromBucketAggregator(parent)) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                execution = ExecutionMode.GLOBAL_ORDINALS;
            }
        }
        assert execution != null;
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            throw new UnsupportedOperationException("No support for examining floating point numerics");
        }
        IncludeExclude.LongFilter longFilter = null;
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 13 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class TermsTests method createTestAggregatorBuilder.

@Override
protected TermsAggregationBuilder createTestAggregatorBuilder() {
    String name = randomAsciiOfLengthBetween(3, 20);
    TermsAggregationBuilder factory = new TermsAggregationBuilder(name, null);
    String field = randomAsciiOfLengthBetween(3, 20);
    int randomFieldBranch = randomInt(2);
    switch(randomFieldBranch) {
        case 0:
            factory.field(field);
            break;
        case 1:
            factory.field(field);
            factory.script(new Script("_value + 1"));
            break;
        case 2:
            factory.script(new Script("doc[" + field + "] + 1"));
            break;
        default:
            fail();
    }
    if (randomBoolean()) {
        factory.missing("MISSING");
    }
    if (randomBoolean()) {
        factory.size(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        factory.shardSize(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        int minDocCount = randomInt(4);
        switch(minDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
            default:
                fail();
        }
        factory.minDocCount(minDocCount);
    }
    if (randomBoolean()) {
        int shardMinDocCount = randomInt(4);
        switch(shardMinDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
            default:
                fail();
        }
        factory.shardMinDocCount(shardMinDocCount);
    }
    if (randomBoolean()) {
        factory.collectMode(randomFrom(SubAggCollectionMode.values()));
    }
    if (randomBoolean()) {
        factory.executionHint(randomFrom(executionHints));
    }
    if (randomBoolean()) {
        factory.format("###.##");
    }
    if (randomBoolean()) {
        IncludeExclude incExc = null;
        switch(randomInt(6)) {
            case 0:
                incExc = new IncludeExclude(new RegExp("foobar"), null);
                break;
            case 1:
                incExc = new IncludeExclude(null, new RegExp("foobaz"));
                break;
            case 2:
                incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
                break;
            case 3:
                SortedSet<BytesRef> includeValues = new TreeSet<>();
                int numIncs = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs; i++) {
                    includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues = null;
                incExc = new IncludeExclude(includeValues, excludeValues);
                break;
            case 4:
                SortedSet<BytesRef> includeValues2 = null;
                SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
                int numExcs2 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs2; i++) {
                    excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues2, excludeValues2);
                break;
            case 5:
                SortedSet<BytesRef> includeValues3 = new TreeSet<>();
                int numIncs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs3; i++) {
                    includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
                int numExcs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs3; i++) {
                    excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues3, excludeValues3);
                break;
            case 6:
                final int numPartitions = randomIntBetween(1, 100);
                final int partition = randomIntBetween(0, numPartitions - 1);
                incExc = new IncludeExclude(partition, numPartitions);
                break;
            default:
                fail();
        }
        factory.includeExclude(incExc);
    }
    if (randomBoolean()) {
        List<Terms.Order> order = randomOrder();
        factory.order(order);
    }
    if (randomBoolean()) {
        factory.showTermDocCountError(randomBoolean());
    }
    return factory;
}
Also used : Script(org.elasticsearch.script.Script) TermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder) RegExp(org.apache.lucene.util.automaton.RegExp) TreeSet(java.util.TreeSet) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) BytesRef(org.apache.lucene.util.BytesRef)

Example 14 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class StringTermsIT method testSingleValueFieldWithRegexFiltering.

public void testSingleValueFieldWithRegexFiltering() throws Exception {
    // include without exclude
    // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
    SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude("val00.+", null))).execute().actionGet();
    assertSearchResponse(response);
    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(10));
    for (int i = 0; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }
    // include and exclude
    // we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
    response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude("val00.+", "(val000|val001)"))).execute().actionGet();
    assertSearchResponse(response);
    terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(8));
    for (int i = 2; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }
    // exclude without include
    // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
    response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude(null, new RegExp("val0[1-9]+.+")))).execute().actionGet();
    assertSearchResponse(response);
    terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(10));
    for (int i = 0; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }
}
Also used : Bucket(org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket) RegExp(org.apache.lucene.util.automaton.RegExp) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 15 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class AvgBucketIT method testNoBuckets.

public void testNoBuckets() throws Exception {
    SearchResponse response = client().prepareSearch("idx").addAggregation(terms("terms").field("tag").includeExclude(new IncludeExclude(null, "tag.*")).subAggregation(sum("sum").field(SINGLE_VALUED_FIELD_NAME))).addAggregation(avgBucket("avg_bucket", "terms>sum")).execute().actionGet();
    assertSearchResponse(response);
    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    List<Terms.Bucket> buckets = terms.getBuckets();
    assertThat(buckets.size(), equalTo(0));
    InternalSimpleValue avgBucketValue = response.getAggregations().get("avg_bucket");
    assertThat(avgBucketValue, notNullValue());
    assertThat(avgBucketValue.getName(), equalTo("avg_bucket"));
    assertThat(avgBucketValue.value(), equalTo(Double.NaN));
}
Also used : PipelineAggregatorBuilders.avgBucket(org.elasticsearch.search.aggregations.pipeline.PipelineAggregatorBuilders.avgBucket) Bucket(org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Aggregations

IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)33 SearchResponse (org.elasticsearch.action.search.SearchResponse)19 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)18 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)16 Bucket (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket)7 Bucket (org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket)6 HashSet (java.util.HashSet)5 TreeSet (java.util.TreeSet)4 BytesRef (org.apache.lucene.util.BytesRef)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 RegExp (org.apache.lucene.util.automaton.RegExp)3 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)3 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)3 LongBitSet (org.apache.lucene.util.LongBitSet)2 QueryParseContext (org.elasticsearch.index.query.QueryParseContext)2 Script (org.elasticsearch.script.Script)2 DocValueFormat (org.elasticsearch.search.DocValueFormat)2 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)2 Bucket (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket)2 BucketCountThresholds (org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds)2