Search in sources :

Example 1 with SubAggCollectionMode

use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.

the class CopyToMapperIntegrationIT method testDynamicTemplateCopyTo.

public void testDynamicTemplateCopyTo() throws Exception {
    assertAcked(client().admin().indices().prepareCreate("test-idx").addMapping("doc", createDynamicTemplateMapping()));
    int recordCount = between(1, 200);
    for (int i = 0; i < recordCount * 2; i++) {
        client().prepareIndex("test-idx", "doc", Integer.toString(i)).setSource("test_field", "test " + i, "even", i % 2 == 0).get();
    }
    client().admin().indices().prepareRefresh("test-idx").execute().actionGet();
    SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
    SearchResponse response = client().prepareSearch("test-idx").setQuery(QueryBuilders.termQuery("even", true)).addAggregation(AggregationBuilders.terms("test").field("test_field").size(recordCount * 2).collectMode(aggCollectionMode)).addAggregation(AggregationBuilders.terms("test_raw").field("test_field_raw").size(recordCount * 2).collectMode(aggCollectionMode)).execute().actionGet();
    assertThat(response.getHits().getTotalHits(), equalTo((long) recordCount));
    assertThat(((Terms) response.getAggregations().get("test")).getBuckets().size(), equalTo(recordCount + 1));
    assertThat(((Terms) response.getAggregations().get("test_raw")).getBuckets().size(), equalTo(recordCount));
}
Also used : SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 2 with SubAggCollectionMode

use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.

the class CombiIT method testSubAggregationForTopAggregationOnUnmappedField.

/**
     * Some top aggs (eg. date_/histogram) that are executed on unmapped fields, will generate an estimate count of buckets - zero.
     * when the sub aggregator is then created, it will take this estimation into account. This used to cause
     * and an ArrayIndexOutOfBoundsException...
     */
public void testSubAggregationForTopAggregationOnUnmappedField() throws Exception {
    prepareCreate("idx").addMapping("type", jsonBuilder().startObject().startObject("type").startObject("properties").startObject("name").field("type", "keyword").endObject().startObject("value").field("type", "integer").endObject().endObject().endObject().endObject()).execute().actionGet();
    ensureSearchable("idx");
    SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
    SearchResponse searchResponse = client().prepareSearch("idx").addAggregation(histogram("values").field("value1").interval(1).subAggregation(terms("names").field("name").collectMode(aggCollectionMode))).execute().actionGet();
    assertThat(searchResponse.getHits().getTotalHits(), Matchers.equalTo(0L));
    Histogram values = searchResponse.getAggregations().get("values");
    assertThat(values, notNullValue());
    assertThat(values.getBuckets().isEmpty(), is(true));
}
Also used : Histogram(org.elasticsearch.search.aggregations.bucket.histogram.Histogram) SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 3 with SubAggCollectionMode

use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.

the class CombiIT method testMultipleAggsOnSameField_WithDifferentRequiredValueSourceType.

/**
     * Making sure that if there are multiple aggregations, working on the same field, yet require different
     * value source type, they can all still work. It used to fail as we used to cache the ValueSource by the
     * field name. If the cached value source was of type "bytes" and another aggregation on the field required to see
     * it as "numeric", it didn't work. Now we cache the Value Sources by a custom key (field name + ValueSource type)
     * so there's no conflict there.
     */
public void testMultipleAggsOnSameField_WithDifferentRequiredValueSourceType() throws Exception {
    createIndex("idx");
    IndexRequestBuilder[] builders = new IndexRequestBuilder[randomInt(30)];
    IntIntMap values = new IntIntHashMap();
    long missingValues = 0;
    for (int i = 0; i < builders.length; i++) {
        String name = "name_" + randomIntBetween(1, 10);
        if (rarely()) {
            missingValues++;
            builders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("name", name).endObject());
        } else {
            int value = randomIntBetween(1, 10);
            values.put(value, values.getOrDefault(value, 0) + 1);
            builders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("name", name).field("value", value).endObject());
        }
    }
    indexRandom(true, builders);
    ensureSearchable();
    SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
    SearchResponse response = client().prepareSearch("idx").addAggregation(missing("missing_values").field("value")).addAggregation(terms("values").field("value").collectMode(aggCollectionMode)).execute().actionGet();
    assertSearchResponse(response);
    Aggregations aggs = response.getAggregations();
    Missing missing = aggs.get("missing_values");
    assertNotNull(missing);
    assertThat(missing.getDocCount(), equalTo(missingValues));
    Terms terms = aggs.get("values");
    assertNotNull(terms);
    Collection<Terms.Bucket> buckets = terms.getBuckets();
    assertThat(buckets.size(), equalTo(values.size()));
    for (Terms.Bucket bucket : buckets) {
        values.remove(((Number) bucket.getKey()).intValue());
    }
    assertTrue(values.isEmpty());
}
Also used : Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) IntIntMap(com.carrotsearch.hppc.IntIntMap) IntIntHashMap(com.carrotsearch.hppc.IntIntHashMap) Missing(org.elasticsearch.search.aggregations.bucket.missing.Missing) SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode)

Example 4 with SubAggCollectionMode

use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.

the class TermsAggregatorFactory method doCreateInternal.

@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
    if (collectsFromSingleBucket == false) {
        return asMultiBucketAggregator(this, context, parent);
    }
    BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
    if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) && bucketCountThresholds.getShardSize() == TermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
        // The user has not made a shardSize selection. Use default
        // heuristic to avoid any wrong-ranking caused by distributed
        // counting
        bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
    }
    bucketCountThresholds.ensureValidity();
    if (valuesSource instanceof ValuesSource.Bytes) {
        ExecutionMode execution = null;
        if (executionHint != null) {
            execution = ExecutionMode.fromString(executionHint);
        }
        // In some cases, using ordinals is just not supported: override it
        if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
            execution = ExecutionMode.MAP;
        }
        final long maxOrd;
        final double ratio;
        if (execution == null || execution.needsGlobalOrdinals()) {
            ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
            IndexSearcher indexSearcher = context.searcher();
            maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
            ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
        } else {
            maxOrd = -1;
            ratio = -1;
        }
        // Let's try to use a good default
        if (execution == null) {
            // ordinals would be sparse so we opt for hash
            if (Aggregator.descendsFromBucketAggregator(parent) || (includeExclude != null && includeExclude.isPartitionBased())) {
                execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
            } else {
                if (factories == AggregatorFactories.EMPTY) {
                    if (ratio <= 0.5 && maxOrd <= 2048) {
                        // 0.5: At least we need reduce the number of global
                        // ordinals look-ups by half
                        // 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has
                        // additional memory usage, which directly linked to
                        // maxOrd, so we need to limit.
                        execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
                    } else {
                        execution = ExecutionMode.GLOBAL_ORDINALS;
                    }
                } else {
                    execution = ExecutionMode.GLOBAL_ORDINALS;
                }
            }
        }
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            cm = SubAggCollectionMode.DEPTH_FIRST;
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
            }
        }
        DocValueFormat format = config.format();
        if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
        }
        return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, cm, showTermDocCountError, pipelineAggregators, metaData);
    }
    if ((includeExclude != null) && (includeExclude.isRegexBased())) {
        throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
    }
    if (valuesSource instanceof ValuesSource.Numeric) {
        IncludeExclude.LongFilter longFilter = null;
        SubAggCollectionMode cm = collectMode;
        if (cm == null) {
            if (factories != AggregatorFactories.EMPTY) {
                cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
            } else {
                cm = SubAggCollectionMode.DEPTH_FIRST;
            }
        }
        if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
            if (includeExclude != null) {
                longFilter = includeExclude.convertToDoubleFilter();
            }
            return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
        }
        if (includeExclude != null) {
            longFilter = includeExclude.convertToLongFilter(config.format());
        }
        return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
    }
    throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) DocValueFormat(org.elasticsearch.search.DocValueFormat) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) ValuesSource(org.elasticsearch.search.aggregations.support.ValuesSource) BucketCountThresholds(org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds) SubAggCollectionMode(org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode) AggregationExecutionException(org.elasticsearch.search.aggregations.AggregationExecutionException)

Aggregations

SubAggCollectionMode (org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode)4 SearchResponse (org.elasticsearch.action.search.SearchResponse)3 Terms (org.elasticsearch.search.aggregations.bucket.terms.Terms)2 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)2 IntIntHashMap (com.carrotsearch.hppc.IntIntHashMap)1 IntIntMap (com.carrotsearch.hppc.IntIntMap)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)1 DocValueFormat (org.elasticsearch.search.DocValueFormat)1 AggregationExecutionException (org.elasticsearch.search.aggregations.AggregationExecutionException)1 Histogram (org.elasticsearch.search.aggregations.bucket.histogram.Histogram)1 Missing (org.elasticsearch.search.aggregations.bucket.missing.Missing)1 BucketCountThresholds (org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds)1 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)1 ValuesSource (org.elasticsearch.search.aggregations.support.ValuesSource)1