use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.
the class SignificantTermsTests method createTestAggregatorBuilder.
@Override
protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
String name = randomAsciiOfLengthBetween(3, 20);
SignificantTermsAggregationBuilder factory = new SignificantTermsAggregationBuilder(name, null);
String field = randomAsciiOfLengthBetween(3, 20);
int randomFieldBranch = randomInt(2);
switch(randomFieldBranch) {
case 0:
factory.field(field);
break;
case 1:
factory.field(field);
factory.script(new Script("_value + 1"));
break;
case 2:
factory.script(new Script("doc[" + field + "] + 1"));
break;
default:
fail();
}
if (randomBoolean()) {
factory.missing("MISSING");
}
if (randomBoolean()) {
factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
int minDocCount = randomInt(4);
switch(minDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
}
factory.bucketCountThresholds().setMinDocCount(minDocCount);
}
if (randomBoolean()) {
int shardMinDocCount = randomInt(4);
switch(shardMinDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
default:
fail();
}
factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount);
}
if (randomBoolean()) {
factory.executionHint(randomFrom(executionHints));
}
if (randomBoolean()) {
factory.format("###.##");
}
if (randomBoolean()) {
IncludeExclude incExc = null;
switch(randomInt(5)) {
case 0:
incExc = new IncludeExclude(new RegExp("foobar"), null);
break;
case 1:
incExc = new IncludeExclude(null, new RegExp("foobaz"));
break;
case 2:
incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
break;
case 3:
SortedSet<BytesRef> includeValues = new TreeSet<>();
int numIncs = randomIntBetween(1, 20);
for (int i = 0; i < numIncs; i++) {
includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues = null;
incExc = new IncludeExclude(includeValues, excludeValues);
break;
case 4:
SortedSet<BytesRef> includeValues2 = null;
SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
int numExcs2 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs2; i++) {
excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues2, excludeValues2);
break;
case 5:
SortedSet<BytesRef> includeValues3 = new TreeSet<>();
int numIncs3 = randomIntBetween(1, 20);
for (int i = 0; i < numIncs3; i++) {
includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
int numExcs3 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs3; i++) {
excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues3, excludeValues3);
break;
default:
fail();
}
factory.includeExclude(incExc);
}
if (randomBoolean()) {
SignificanceHeuristic significanceHeuristic = null;
switch(randomInt(5)) {
case 0:
significanceHeuristic = new PercentageScore();
break;
case 1:
significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean());
break;
case 2:
significanceHeuristic = new GND(randomBoolean());
break;
case 3:
significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean());
break;
case 4:
significanceHeuristic = new ScriptHeuristic(new Script("foo"));
break;
case 5:
significanceHeuristic = new JLHScore();
break;
default:
fail();
}
factory.significanceHeuristic(significanceHeuristic);
}
if (randomBoolean()) {
factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
}
return factory;
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.
the class SignificanceHeuristicTests method getRandomSignificanceheuristic.
SignificanceHeuristic getRandomSignificanceheuristic() {
List<SignificanceHeuristic> heuristics = new ArrayList<>();
heuristics.add(new JLHScore());
heuristics.add(new MutualInformation(randomBoolean(), randomBoolean()));
heuristics.add(new GND(randomBoolean()));
heuristics.add(new ChiSquare(randomBoolean(), randomBoolean()));
return heuristics.get(randomInt(3));
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.
the class SignificanceHeuristicTests method createInternalAggregations.
// Create aggregations as they might come from three different shards and return as list.
private List<InternalAggregation> createInternalAggregations() {
SignificanceHeuristic significanceHeuristic = getRandomSignificanceheuristic();
TestAggFactory<?, ?> factory = randomBoolean() ? new StringTestAggFactory() : new LongTestAggFactory();
List<InternalAggregation> aggs = new ArrayList<>();
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0)));
aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 1)));
aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 4, 5, 10, i)));
return aggs;
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.
the class SearchModule method registerSignificanceHeuristic.
private void registerSignificanceHeuristic(SearchExtensionSpec<SignificanceHeuristic, SignificanceHeuristicParser> heuristic) {
significanceHeuristicParserRegistry.register(heuristic.getParser(), heuristic.getName());
namedWriteables.add(new NamedWriteableRegistry.Entry(SignificanceHeuristic.class, heuristic.getName().getPreferredName(), heuristic.getReader()));
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic in project elasticsearch by elastic.
the class InternalSignificantTerms method doReduce.
@Override
public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
long globalSubsetSize = 0;
long globalSupersetSize = 0;
// top-level Aggregations from each shard
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
globalSubsetSize += terms.getSubsetSize();
globalSupersetSize += terms.getSupersetSize();
}
Map<String, List<B>> buckets = new HashMap<>();
for (InternalAggregation aggregation : aggregations) {
@SuppressWarnings("unchecked") InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
for (B bucket : terms.getBucketsInternal()) {
List<B> existingBuckets = buckets.get(bucket.getKeyAsString());
if (existingBuckets == null) {
existingBuckets = new ArrayList<>(aggregations.size());
buckets.put(bucket.getKeyAsString(), existingBuckets);
}
// Adjust the buckets with the global stats representing the
// total size of the pots from which the stats are drawn
existingBuckets.add(bucket.newBucket(bucket.getSubsetDf(), globalSubsetSize, bucket.getSupersetDf(), globalSupersetSize, bucket.aggregations));
}
}
SignificanceHeuristic heuristic = getSignificanceHeuristic().rewrite(reduceContext);
final int size = reduceContext.isFinalReduce() == false ? buckets.size() : Math.min(requiredSize, buckets.size());
BucketSignificancePriorityQueue<B> ordered = new BucketSignificancePriorityQueue<>(size);
for (Map.Entry<String, List<B>> entry : buckets.entrySet()) {
List<B> sameTermBuckets = entry.getValue();
final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
b.updateScore(heuristic);
if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) {
ordered.insertWithOverflow(b);
}
}
B[] list = createBucketsArray(ordered.size());
for (int i = ordered.size() - 1; i >= 0; i--) {
list[i] = ordered.pop();
}
return create(globalSubsetSize, globalSupersetSize, Arrays.asList(list));
}
Aggregations