use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.
the class CopyToMapperIntegrationIT method testDynamicTemplateCopyTo.
public void testDynamicTemplateCopyTo() throws Exception {
assertAcked(client().admin().indices().prepareCreate("test-idx").addMapping("doc", createDynamicTemplateMapping()));
int recordCount = between(1, 200);
for (int i = 0; i < recordCount * 2; i++) {
client().prepareIndex("test-idx", "doc", Integer.toString(i)).setSource("test_field", "test " + i, "even", i % 2 == 0).get();
}
client().admin().indices().prepareRefresh("test-idx").execute().actionGet();
SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
SearchResponse response = client().prepareSearch("test-idx").setQuery(QueryBuilders.termQuery("even", true)).addAggregation(AggregationBuilders.terms("test").field("test_field").size(recordCount * 2).collectMode(aggCollectionMode)).addAggregation(AggregationBuilders.terms("test_raw").field("test_field_raw").size(recordCount * 2).collectMode(aggCollectionMode)).execute().actionGet();
assertThat(response.getHits().getTotalHits(), equalTo((long) recordCount));
assertThat(((Terms) response.getAggregations().get("test")).getBuckets().size(), equalTo(recordCount + 1));
assertThat(((Terms) response.getAggregations().get("test_raw")).getBuckets().size(), equalTo(recordCount));
}
use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.
the class CombiIT method testSubAggregationForTopAggregationOnUnmappedField.
/**
* Some top aggs (eg. date_/histogram) that are executed on unmapped fields, will generate an estimate count of buckets - zero.
* when the sub aggregator is then created, it will take this estimation into account. This used to cause
* and an ArrayIndexOutOfBoundsException...
*/
public void testSubAggregationForTopAggregationOnUnmappedField() throws Exception {
prepareCreate("idx").addMapping("type", jsonBuilder().startObject().startObject("type").startObject("properties").startObject("name").field("type", "keyword").endObject().startObject("value").field("type", "integer").endObject().endObject().endObject().endObject()).execute().actionGet();
ensureSearchable("idx");
SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
SearchResponse searchResponse = client().prepareSearch("idx").addAggregation(histogram("values").field("value1").interval(1).subAggregation(terms("names").field("name").collectMode(aggCollectionMode))).execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), Matchers.equalTo(0L));
Histogram values = searchResponse.getAggregations().get("values");
assertThat(values, notNullValue());
assertThat(values.getBuckets().isEmpty(), is(true));
}
use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.
the class CombiIT method testMultipleAggsOnSameField_WithDifferentRequiredValueSourceType.
/**
* Making sure that if there are multiple aggregations, working on the same field, yet require different
* value source type, they can all still work. It used to fail as we used to cache the ValueSource by the
* field name. If the cached value source was of type "bytes" and another aggregation on the field required to see
* it as "numeric", it didn't work. Now we cache the Value Sources by a custom key (field name + ValueSource type)
* so there's no conflict there.
*/
public void testMultipleAggsOnSameField_WithDifferentRequiredValueSourceType() throws Exception {
createIndex("idx");
IndexRequestBuilder[] builders = new IndexRequestBuilder[randomInt(30)];
IntIntMap values = new IntIntHashMap();
long missingValues = 0;
for (int i = 0; i < builders.length; i++) {
String name = "name_" + randomIntBetween(1, 10);
if (rarely()) {
missingValues++;
builders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("name", name).endObject());
} else {
int value = randomIntBetween(1, 10);
values.put(value, values.getOrDefault(value, 0) + 1);
builders[i] = client().prepareIndex("idx", "type").setSource(jsonBuilder().startObject().field("name", name).field("value", value).endObject());
}
}
indexRandom(true, builders);
ensureSearchable();
SubAggCollectionMode aggCollectionMode = randomFrom(SubAggCollectionMode.values());
SearchResponse response = client().prepareSearch("idx").addAggregation(missing("missing_values").field("value")).addAggregation(terms("values").field("value").collectMode(aggCollectionMode)).execute().actionGet();
assertSearchResponse(response);
Aggregations aggs = response.getAggregations();
Missing missing = aggs.get("missing_values");
assertNotNull(missing);
assertThat(missing.getDocCount(), equalTo(missingValues));
Terms terms = aggs.get("values");
assertNotNull(terms);
Collection<Terms.Bucket> buckets = terms.getBuckets();
assertThat(buckets.size(), equalTo(values.size()));
for (Terms.Bucket bucket : buckets) {
values.remove(((Number) bucket.getKey()).intValue());
}
assertTrue(values.isEmpty());
}
use of org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode in project elasticsearch by elastic.
the class TermsAggregatorFactory method doCreateInternal.
@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (!(order == InternalOrder.TERM_ASC || order == InternalOrder.TERM_DESC) && bucketCountThresholds.getShardSize() == TermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection. Use default
// heuristic to avoid any wrong-ranking caused by distributed
// counting
bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
}
bucketCountThresholds.ensureValidity();
if (valuesSource instanceof ValuesSource.Bytes) {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint);
}
// In some cases, using ordinals is just not supported: override it
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
}
final long maxOrd;
final double ratio;
if (execution == null || execution.needsGlobalOrdinals()) {
ValuesSource.Bytes.WithOrdinals valueSourceWithOrdinals = (ValuesSource.Bytes.WithOrdinals) valuesSource;
IndexSearcher indexSearcher = context.searcher();
maxOrd = valueSourceWithOrdinals.globalMaxOrd(indexSearcher);
ratio = maxOrd / ((double) indexSearcher.getIndexReader().numDocs());
} else {
maxOrd = -1;
ratio = -1;
}
// Let's try to use a good default
if (execution == null) {
// ordinals would be sparse so we opt for hash
if (Aggregator.descendsFromBucketAggregator(parent) || (includeExclude != null && includeExclude.isPartitionBased())) {
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
} else {
if (factories == AggregatorFactories.EMPTY) {
if (ratio <= 0.5 && maxOrd <= 2048) {
// 0.5: At least we need reduce the number of global
// ordinals look-ups by half
// 2048: GLOBAL_ORDINALS_LOW_CARDINALITY has
// additional memory usage, which directly linked to
// maxOrd, so we need to limit.
execution = ExecutionMode.GLOBAL_ORDINALS_LOW_CARDINALITY;
} else {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
} else {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
}
}
SubAggCollectionMode cm = collectMode;
if (cm == null) {
cm = SubAggCollectionMode.DEPTH_FIRST;
if (factories != AggregatorFactories.EMPTY) {
cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), maxOrd);
}
}
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, order, format, bucketCountThresholds, includeExclude, context, parent, cm, showTermDocCountError, pipelineAggregators, metaData);
}
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}
if (valuesSource instanceof ValuesSource.Numeric) {
IncludeExclude.LongFilter longFilter = null;
SubAggCollectionMode cm = collectMode;
if (cm == null) {
if (factories != AggregatorFactories.EMPTY) {
cm = subAggCollectionMode(bucketCountThresholds.getShardSize(), -1);
} else {
cm = SubAggCollectionMode.DEPTH_FIRST;
}
}
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
if (includeExclude != null) {
longFilter = includeExclude.convertToDoubleFilter();
}
return new DoubleTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
}
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new LongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), order, bucketCountThresholds, context, parent, cm, showTermDocCountError, longFilter, pipelineAggregators, metaData);
}
throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
Aggregations