use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class IncludeExcludeTests method testPartitionedEquals.
public void testPartitionedEquals() throws IOException {
IncludeExclude serialized = serialize(new IncludeExclude(3, 20), IncludeExclude.INCLUDE_FIELD);
assertFalse(serialized.isRegexBased());
assertTrue(serialized.isPartitionBased());
IncludeExclude same = new IncludeExclude(3, 20);
assertEquals(serialized, same);
assertEquals(serialized.hashCode(), same.hashCode());
IncludeExclude differentParam1 = new IncludeExclude(4, 20);
assertFalse(serialized.equals(differentParam1));
assertTrue(serialized.hashCode() != differentParam1.hashCode());
IncludeExclude differentParam2 = new IncludeExclude(3, 21);
assertFalse(serialized.equals(differentParam2));
assertTrue(serialized.hashCode() != differentParam2.hashCode());
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class SignificantTermsAggregatorFactory method doCreateInternal.
@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}
numberOfAggregatorsCreated++;
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection .
// Use default heuristic to avoid any wrong-ranking caused by
// distributed counting
// but request double the usual amount.
// We typically need more than the number of "top" terms requested
// by other aggregations
// as the significance algorithm is in less of a position to
// down-select at shard-level -
// some of the things we want to find have only one occurrence on
// each shard and as
// such are impossible to differentiate from non-significant terms
// at that early stage.
bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
}
if (valuesSource instanceof ValuesSource.Bytes) {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint);
}
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
}
if (execution == null) {
if (Aggregator.descendsFromBucketAggregator(parent)) {
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
} else {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
}
assert execution != null;
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
}
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}
if (valuesSource instanceof ValuesSource.Numeric) {
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics");
}
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
}
throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class TermsTests method createTestAggregatorBuilder.
@Override
protected TermsAggregationBuilder createTestAggregatorBuilder() {
String name = randomAsciiOfLengthBetween(3, 20);
TermsAggregationBuilder factory = new TermsAggregationBuilder(name, null);
String field = randomAsciiOfLengthBetween(3, 20);
int randomFieldBranch = randomInt(2);
switch(randomFieldBranch) {
case 0:
factory.field(field);
break;
case 1:
factory.field(field);
factory.script(new Script("_value + 1"));
break;
case 2:
factory.script(new Script("doc[" + field + "] + 1"));
break;
default:
fail();
}
if (randomBoolean()) {
factory.missing("MISSING");
}
if (randomBoolean()) {
factory.size(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
factory.shardSize(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
int minDocCount = randomInt(4);
switch(minDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
default:
fail();
}
factory.minDocCount(minDocCount);
}
if (randomBoolean()) {
int shardMinDocCount = randomInt(4);
switch(shardMinDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
default:
fail();
}
factory.shardMinDocCount(shardMinDocCount);
}
if (randomBoolean()) {
factory.collectMode(randomFrom(SubAggCollectionMode.values()));
}
if (randomBoolean()) {
factory.executionHint(randomFrom(executionHints));
}
if (randomBoolean()) {
factory.format("###.##");
}
if (randomBoolean()) {
IncludeExclude incExc = null;
switch(randomInt(6)) {
case 0:
incExc = new IncludeExclude(new RegExp("foobar"), null);
break;
case 1:
incExc = new IncludeExclude(null, new RegExp("foobaz"));
break;
case 2:
incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
break;
case 3:
SortedSet<BytesRef> includeValues = new TreeSet<>();
int numIncs = randomIntBetween(1, 20);
for (int i = 0; i < numIncs; i++) {
includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues = null;
incExc = new IncludeExclude(includeValues, excludeValues);
break;
case 4:
SortedSet<BytesRef> includeValues2 = null;
SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
int numExcs2 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs2; i++) {
excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues2, excludeValues2);
break;
case 5:
SortedSet<BytesRef> includeValues3 = new TreeSet<>();
int numIncs3 = randomIntBetween(1, 20);
for (int i = 0; i < numIncs3; i++) {
includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
int numExcs3 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs3; i++) {
excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues3, excludeValues3);
break;
case 6:
final int numPartitions = randomIntBetween(1, 100);
final int partition = randomIntBetween(0, numPartitions - 1);
incExc = new IncludeExclude(partition, numPartitions);
break;
default:
fail();
}
factory.includeExclude(incExc);
}
if (randomBoolean()) {
List<Terms.Order> order = randomOrder();
factory.order(order);
}
if (randomBoolean()) {
factory.showTermDocCountError(randomBoolean());
}
return factory;
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class StringTermsIT method testSingleValueFieldWithRegexFiltering.
public void testSingleValueFieldWithRegexFiltering() throws Exception {
// include without exclude
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude("val00.+", null))).execute().actionGet();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1L));
}
// include and exclude
// we should be left with: val002, val003, val004, val005, val006, val007, val008, val009
response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude("val00.+", "(val000|val001)"))).execute().actionGet();
assertSearchResponse(response);
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(8));
for (int i = 2; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1L));
}
// exclude without include
// we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009
response = client().prepareSearch("idx").setTypes("high_card_type").addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME).collectMode(randomFrom(SubAggCollectionMode.values())).includeExclude(new IncludeExclude(null, new RegExp("val0[1-9]+.+")))).execute().actionGet();
assertSearchResponse(response);
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(10));
for (int i = 0; i < 10; i++) {
Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
assertThat(bucket, notNullValue());
assertThat(key(bucket), equalTo("val00" + i));
assertThat(bucket.getDocCount(), equalTo(1L));
}
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class AvgBucketIT method testNoBuckets.
public void testNoBuckets() throws Exception {
SearchResponse response = client().prepareSearch("idx").addAggregation(terms("terms").field("tag").includeExclude(new IncludeExclude(null, "tag.*")).subAggregation(sum("sum").field(SINGLE_VALUED_FIELD_NAME))).addAggregation(avgBucket("avg_bucket", "terms>sum")).execute().actionGet();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
List<Terms.Bucket> buckets = terms.getBuckets();
assertThat(buckets.size(), equalTo(0));
InternalSimpleValue avgBucketValue = response.getAggregations().get("avg_bucket");
assertThat(avgBucketValue, notNullValue());
assertThat(avgBucketValue.getName(), equalTo("avg_bucket"));
assertThat(avgBucketValue.value(), equalTo(Double.NaN));
}
Aggregations