use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.
the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.
public void testScoresEqualForPositiveAndNegative() throws Exception {
indexEqualTestData();
testScoresEqualForPositiveAndNegative(new MutualInformation(true, true));
testScoresEqualForPositiveAndNegative(new ChiSquare(true, true));
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.
the class SignificantTermsTests method createTestAggregatorBuilder.
@Override
protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
String name = randomAsciiOfLengthBetween(3, 20);
SignificantTermsAggregationBuilder factory = new SignificantTermsAggregationBuilder(name, null);
String field = randomAsciiOfLengthBetween(3, 20);
int randomFieldBranch = randomInt(2);
switch(randomFieldBranch) {
case 0:
factory.field(field);
break;
case 1:
factory.field(field);
factory.script(new Script("_value + 1"));
break;
case 2:
factory.script(new Script("doc[" + field + "] + 1"));
break;
default:
fail();
}
if (randomBoolean()) {
factory.missing("MISSING");
}
if (randomBoolean()) {
factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
}
if (randomBoolean()) {
int minDocCount = randomInt(4);
switch(minDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
}
factory.bucketCountThresholds().setMinDocCount(minDocCount);
}
if (randomBoolean()) {
int shardMinDocCount = randomInt(4);
switch(shardMinDocCount) {
case 0:
break;
case 1:
case 2:
case 3:
case 4:
shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
break;
default:
fail();
}
factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount);
}
if (randomBoolean()) {
factory.executionHint(randomFrom(executionHints));
}
if (randomBoolean()) {
factory.format("###.##");
}
if (randomBoolean()) {
IncludeExclude incExc = null;
switch(randomInt(5)) {
case 0:
incExc = new IncludeExclude(new RegExp("foobar"), null);
break;
case 1:
incExc = new IncludeExclude(null, new RegExp("foobaz"));
break;
case 2:
incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
break;
case 3:
SortedSet<BytesRef> includeValues = new TreeSet<>();
int numIncs = randomIntBetween(1, 20);
for (int i = 0; i < numIncs; i++) {
includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues = null;
incExc = new IncludeExclude(includeValues, excludeValues);
break;
case 4:
SortedSet<BytesRef> includeValues2 = null;
SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
int numExcs2 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs2; i++) {
excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues2, excludeValues2);
break;
case 5:
SortedSet<BytesRef> includeValues3 = new TreeSet<>();
int numIncs3 = randomIntBetween(1, 20);
for (int i = 0; i < numIncs3; i++) {
includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
int numExcs3 = randomIntBetween(1, 20);
for (int i = 0; i < numExcs3; i++) {
excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
}
incExc = new IncludeExclude(includeValues3, excludeValues3);
break;
default:
fail();
}
factory.includeExclude(incExc);
}
if (randomBoolean()) {
SignificanceHeuristic significanceHeuristic = null;
switch(randomInt(5)) {
case 0:
significanceHeuristic = new PercentageScore();
break;
case 1:
significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean());
break;
case 2:
significanceHeuristic = new GND(randomBoolean());
break;
case 3:
significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean());
break;
case 4:
significanceHeuristic = new ScriptHeuristic(new Script("foo"));
break;
case 5:
significanceHeuristic = new JLHScore();
break;
default:
fail();
}
factory.significanceHeuristic(significanceHeuristic);
}
if (randomBoolean()) {
factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
}
return factory;
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.
the class SignificanceHeuristicTests method testBasicScoreProperties.
public void testBasicScoreProperties() {
basicScoreProperties(new JLHScore(), true);
basicScoreProperties(new GND(true), true);
basicScoreProperties(new PercentageScore(), true);
basicScoreProperties(new MutualInformation(true, true), false);
basicScoreProperties(new ChiSquare(true, true), false);
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.
the class SignificanceHeuristicTests method getRandomSignificanceheuristic.
SignificanceHeuristic getRandomSignificanceheuristic() {
List<SignificanceHeuristic> heuristics = new ArrayList<>();
heuristics.add(new JLHScore());
heuristics.add(new MutualInformation(randomBoolean(), randomBoolean()));
heuristics.add(new GND(randomBoolean()));
heuristics.add(new ChiSquare(randomBoolean(), randomBoolean()));
return heuristics.get(randomInt(3));
}
use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.
the class SignificanceHeuristicTests method testBuilderAndParser.
// test that
// 1. The output of the builders can actually be parsed
// 2. The parser does not swallow parameters after a significance heuristic was defined
public void testBuilderAndParser() throws Exception {
SearchModule searchModule = new SearchModule(Settings.EMPTY, false, emptyList());
ParseFieldRegistry<SignificanceHeuristicParser> heuristicParserMapper = searchModule.getSignificanceHeuristicParserRegistry();
// test jlh with string
assertTrue(parseFromString(heuristicParserMapper, "\"jlh\":{}") instanceof JLHScore);
// test gnd with string
assertTrue(parseFromString(heuristicParserMapper, "\"gnd\":{}") instanceof GND);
// test mutual information with string
boolean includeNegatives = randomBoolean();
boolean backgroundIsSuperset = randomBoolean();
String mutual = "\"mutual_information\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}";
assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), parseFromString(heuristicParserMapper, mutual));
String chiSquare = "\"chi_square\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}";
assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), parseFromString(heuristicParserMapper, chiSquare));
// test with builders
assertThat(parseFromBuilder(heuristicParserMapper, new JLHScore()), instanceOf(JLHScore.class));
assertThat(parseFromBuilder(heuristicParserMapper, new GND(backgroundIsSuperset)), instanceOf(GND.class));
assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), parseFromBuilder(heuristicParserMapper, new MutualInformation(includeNegatives, backgroundIsSuperset)));
assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), parseFromBuilder(heuristicParserMapper, new ChiSquare(includeNegatives, backgroundIsSuperset)));
// test exceptions
String faultyHeuristicdefinition = "\"mutual_information\":{\"include_negatives\": false, \"some_unknown_field\": false}";
String expectedError = "unknown field [some_unknown_field]";
checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
faultyHeuristicdefinition = "\"chi_square\":{\"unknown_field\": true}";
expectedError = "unknown field [unknown_field]";
checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
faultyHeuristicdefinition = "\"jlh\":{\"unknown_field\": true}";
expectedError = "expected an empty object, but found ";
checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
faultyHeuristicdefinition = "\"gnd\":{\"unknown_field\": true}";
expectedError = "unknown field [unknown_field]";
checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
}
Aggregations