Search in sources :

Example 31 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class DoubleTermsIT method runTestFieldWithPartitionedFiltering.

private void runTestFieldWithPartitionedFiltering(String field) throws Exception {
    // Find total number of unique terms
    SearchResponse allResponse = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).size(10000).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
    assertSearchResponse(allResponse);
    Terms terms = allResponse.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    int expectedCardinality = terms.getBuckets().size();
    // Gather terms using partitioned aggregations
    final int numPartitions = randomIntBetween(2, 4);
    Set<Number> foundTerms = new HashSet<>();
    for (int partition = 0; partition < numPartitions; partition++) {
        SearchResponse response = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).includeExclude(new IncludeExclude(partition, numPartitions)).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
        assertSearchResponse(response);
        terms = response.getAggregations().get("terms");
        assertThat(terms, notNullValue());
        assertThat(terms.getName(), equalTo("terms"));
        for (Bucket bucket : terms.getBuckets()) {
            assertTrue(foundTerms.add(bucket.getKeyAsNumber()));
        }
    }
    assertEquals(expectedCardinality, foundTerms.size());
}
Also used : Bucket(org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) HashSet(java.util.HashSet)

Example 32 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class TermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) && bucketCountThresholds.getShardSize() == TermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection. Use default
        // heuristic to avoid any wrong-ranking caused by distributed
        // counting
        bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    bucketCountThresholds.ensureValidity();
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        // In some cases, using ordinals is just not supported: override it
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        final long maxOrd;
        final double ratio;
        if (execution == null || execution.needsGlobalOrdinals()) {
            ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
            IndexSearcher indexSearcher = context.searcher();
            maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
            ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
        } else {
            maxOrd = -1;
            ratio = -1;
        }
        // Let's try to use a good default
        if (execution == null) {
            // ordinals would be sparse so we opt for hash
            if (Aggregator.descendsFromBucketAggregator(parent) || (includeExclude != null && includeExclude.isPartitionBased())) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                if (factories == AggregatorFactories.EMPTY) {
                    if (ratio <= 0.5 && maxOrd <= 2048) {
                        // 0.5: At least we need reduce the number of global
                        // ordinals look-ups by half
                        // 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has
                        // additional memory usage, which directly linked to
                        // maxOrd, so we need to limit.
                        execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
                    } else {
                        execution = ExecutionMode.GLOBAL_ORDINALS;
                    }
                } else {
                    execution = ExecutionMode.GLOBAL_ORDINALS;
                }
            }
        }
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            cm = SubAggCollectionMode.DEPTH_FIRST;
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
            }
        }
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, cm, showTermDocCountError, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        IncludeExclude.LongFilter longFilter = null;
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
            } else {
                cm = SubAggCollectionMode.DEPTH_FIRST;
            }
        }
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            if (includeExclude != null) {
                longFilter = includeExclude.convertToDoubleFilter();
            }
            return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
        }
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Example 33 with IncludeExclude

use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.

the class SignificantTermsAggregationBuilder method getParser.

public static Aggregator.Parser getParser(ParseFieldRegistry<SignificanceHeuristicParser> significanceHeuristicParserRegistry) {
    ObjectParser<SignificantTermsAggregationBuilder, QueryParseContext> parser = new ObjectParser<>(SignificantTermsAggregationBuilder.NAME);
    ValuesSourceParserHelper.declareAnyFields(parser, true, true);
    parser.declareInt(SignificantTermsAggregationBuilder::shardSize, TermsAggregationBuilder.SHARD_SIZE_FIELD_NAME);
    parser.declareLong(SignificantTermsAggregationBuilder::minDocCount, TermsAggregationBuilder.MIN_DOC_COUNT_FIELD_NAME);
    parser.declareLong(SignificantTermsAggregationBuilder::shardMinDocCount, TermsAggregationBuilder.SHARD_MIN_DOC_COUNT_FIELD_NAME);
    parser.declareInt(SignificantTermsAggregationBuilder::size, TermsAggregationBuilder.REQUIRED_SIZE_FIELD_NAME);
    parser.declareString(SignificantTermsAggregationBuilder::executionHint, TermsAggregationBuilder.EXECUTION_HINT_FIELD_NAME);
    parser.declareObject(SignificantTermsAggregationBuilder::backgroundFilter, (p, context) -> context.parseInnerQueryBuilder(), SignificantTermsAggregationBuilder.BACKGROUND_FILTER);
    parser.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(v, b.includeExclude())), IncludeExclude::parseInclude, IncludeExclude.INCLUDE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_OR_STRING);
    parser.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(b.includeExclude(), v)), IncludeExclude::parseExclude, IncludeExclude.EXCLUDE_FIELD, ObjectParser.ValueType.STRING_ARRAY);
    for (String name : significanceHeuristicParserRegistry.getNames()) {
        parser.declareObject(SignificantTermsAggregationBuilder::significanceHeuristic, (p, context) -> {
            SignificanceHeuristicParser significanceHeuristicParser = significanceHeuristicParserRegistry.lookupReturningNullIfNotFound(name);
            return significanceHeuristicParser.parse(context);
        }, new ParseField(name));
    }
    return new Aggregator.Parser() {

        @Override
        public AggregationBuilder parse(String aggregationName, QueryParseContext context) throws IOException {
            return parser.parse(context.parser(), new SignificantTermsAggregationBuilder(aggregationName, null), context);
        }
    };
}
Also used : ObjectParser(org.elasticsearch.common.xcontent.ObjectParser) QueryParseContext(org.elasticsearch.index.query.QueryParseContext) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) SignificanceHeuristicParser(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser) ParseField(org.elasticsearch.common.ParseField) SignificanceHeuristicParser(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser) ObjectParser(org.elasticsearch.common.xcontent.ObjectParser)

Aggregations

IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)33 SearchResponse (org.elasticsearch.action.search.SearchResponse)19 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)18 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)16 Bucket (org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket)7 Bucket (org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket)6 HashSet (java.util.HashSet)5 TreeSet (java.util.TreeSet)4 BytesRef (org.apache.lucene.util.BytesRef)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 RegExp (org.apache.lucene.util.automaton.RegExp)3 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)3 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)3 LongBitSet (org.apache.lucene.util.LongBitSet)2 QueryParseContext (org.elasticsearch.index.query.QueryParseContext)2 Script (org.elasticsearch.script.Script)2 DocValueFormat (org.elasticsearch.search.DocValueFormat)2 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)2 Bucket (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms.Bucket)2 BucketCountThresholds (org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds)2