use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class SignificantTermsAggregatorFactory method doCreateInternal.
@Override
protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
if (collectsFromSingleBucket == false) {
return asMultiBucketAggregator(this, context, parent);
}
numberOfAggregatorsCreated++;
BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(this.bucketCountThresholds);
if (bucketCountThresholds.getShardSize() == SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
// The user has not made a shardSize selection .
// Use default heuristic to avoid any wrong-ranking caused by
// distributed counting
// but request double the usual amount.
// We typically need more than the number of "top" terms requested
// by other aggregations
// as the significance algorithm is in less of a position to
// down-select at shard-level -
// some of the things we want to find have only one occurrence on
// each shard and as
// such are impossible to differentiate from non-significant terms
// at that early stage.
bucketCountThresholds.setShardSize(2 * BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize(), context.numberOfShards()));
}
if (valuesSource instanceof ValuesSource.Bytes) {
ExecutionMode execution = null;
if (executionHint != null) {
execution = ExecutionMode.fromString(executionHint);
}
if (!(valuesSource instanceof ValuesSource.Bytes.WithOrdinals)) {
execution = ExecutionMode.MAP;
}
if (execution == null) {
if (Aggregator.descendsFromBucketAggregator(parent)) {
execution = ExecutionMode.GLOBAL_ORDINALS_HASH;
} else {
execution = ExecutionMode.GLOBAL_ORDINALS;
}
}
assert execution != null;
DocValueFormat format = config.format();
if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of values for include/exclude clauses");
}
return execution.create(name, factories, valuesSource, format, bucketCountThresholds, includeExclude, context, parent, significanceHeuristic, this, pipelineAggregators, metaData);
}
if ((includeExclude != null) && (includeExclude.isRegexBased())) {
throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude " + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses used to filter numeric fields");
}
if (valuesSource instanceof ValuesSource.Numeric) {
if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
throw new UnsupportedOperationException("No support for examining floating point numerics");
}
IncludeExclude.LongFilter longFilter = null;
if (includeExclude != null) {
longFilter = includeExclude.convertToLongFilter(config.format());
}
return new SignificantLongTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(), bucketCountThresholds, context, parent, significanceHeuristic, this, longFilter, pipelineAggregators, metaData);
}
throw new AggregationExecutionException("significant_terms aggregation cannot be applied to field [" + config.fieldContext().field() + "]. It can only be applied to numeric or string fields.");
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class DoubleTermsIT method testIncludeExcludeResults.
private void testIncludeExcludeResults(double[] includes, double[] excludes, double[] expecteds) {
SearchResponse response = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(SINGLE_VALUED_FIELD_NAME).includeExclude(new IncludeExclude(includes, excludes)).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
assertSearchResponse(response);
Terms terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
assertThat(terms.getBuckets().size(), equalTo(expecteds.length));
for (int i = 0; i < expecteds.length; i++) {
Terms.Bucket bucket = terms.getBucketByKey("" + expecteds[i]);
assertThat(bucket, notNullValue());
assertThat(bucket.getDocCount(), equalTo(1L));
}
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class LongTermsIT method runTestFieldWithPartitionedFiltering.
private void runTestFieldWithPartitionedFiltering(String field) throws Exception {
// Find total number of unique terms
SearchResponse allResponse = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
assertSearchResponse(allResponse);
Terms terms = allResponse.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
int expectedCardinality = terms.getBuckets().size();
// Gather terms using partitioned aggregations
final int numPartitions = randomIntBetween(2, 4);
Set<Number> foundTerms = new HashSet<>();
for (int partition = 0; partition < numPartitions; partition++) {
SearchResponse response = client().prepareSearch("idx").setTypes("type").addAggregation(terms("terms").field(field).includeExclude(new IncludeExclude(partition, numPartitions)).collectMode(randomFrom(SubAggCollectionMode.values()))).execute().actionGet();
assertSearchResponse(response);
terms = response.getAggregations().get("terms");
assertThat(terms, notNullValue());
assertThat(terms.getName(), equalTo("terms"));
for (Bucket bucket : terms.getBuckets()) {
assertFalse(foundTerms.contains(bucket.getKeyAsNumber()));
foundTerms.add(bucket.getKeyAsNumber());
}
}
assertEquals(expectedCardinality, foundTerms.size());
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class SignificantTermsIT method testIncludeExcludeExactValues.
public void testIncludeExcludeExactValues() throws Exception {
String[] incExcTerms = { "weller", "nosuchterm" };
SearchResponse response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(null, incExcTerms))).get();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
Set<String> terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKeyAsString());
}
assertEquals(new HashSet<String>(Arrays.asList("jam", "council", "style", "paul", "of", "the")), terms);
response = client().prepareSearch("test").setQuery(new TermQueryBuilder("description", "weller")).addAggregation(significantTerms("mySignificantTerms").field("description").executionHint(randomExecutionHint()).includeExclude(new IncludeExclude(incExcTerms, null))).get();
assertSearchResponse(response);
topTerms = response.getAggregations().get("mySignificantTerms");
terms = new HashSet<>();
for (Bucket topTerm : topTerms) {
terms.add(topTerm.getKeyAsString());
}
assertThat(terms, hasSize(1));
assertThat(terms.contains("weller"), is(true));
}
use of org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude in project elasticsearch by elastic.
the class SignificantTermsIT method testStructuredAnalysisWithIncludeExclude.
public void testStructuredAnalysisWithIncludeExclude() throws Exception {
long[] excludeTerms = { MUSIC_CATEGORY };
SearchResponse response = client().prepareSearch("test").setSearchType(SearchType.QUERY_THEN_FETCH).setQuery(new TermQueryBuilder("description", "paul")).setFrom(0).setSize(60).setExplain(true).addAggregation(significantTerms("mySignificantTerms").field("fact_category").executionHint(randomExecutionHint()).minDocCount(1).includeExclude(new IncludeExclude(null, excludeTerms))).execute().actionGet();
assertSearchResponse(response);
SignificantTerms topTerms = response.getAggregations().get("mySignificantTerms");
Number topCategory = (Number) topTerms.getBuckets().iterator().next().getKey();
assertTrue(topCategory.equals(Long.valueOf(OTHER_CATEGORY)));
}
Aggregations