Search in sources :

Example 1 with ChiSquare

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.

the class SignificantTermsSignificanceScoreIT method testScoresEqualForPositiveAndNegative.

public void testScoresEqualForPositiveAndNegative() throws Exception {
    indexEqualTestData();
    testScoresEqualForPositiveAndNegative(new MutualInformation(true, true));
    testScoresEqualForPositiveAndNegative(new ChiSquare(true, true));
}
Also used : ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation)

Example 2 with ChiSquare

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.

the class SignificantTermsTests method createTestAggregatorBuilder.

@Override
protected SignificantTermsAggregationBuilder createTestAggregatorBuilder() {
    String name = randomAsciiOfLengthBetween(3, 20);
    SignificantTermsAggregationBuilder factory = new SignificantTermsAggregationBuilder(name, null);
    String field = randomAsciiOfLengthBetween(3, 20);
    int randomFieldBranch = randomInt(2);
    switch(randomFieldBranch) {
        case 0:
            factory.field(field);
            break;
        case 1:
            factory.field(field);
            factory.script(new Script("_value + 1"));
            break;
        case 2:
            factory.script(new Script("doc[" + field + "] + 1"));
            break;
        default:
            fail();
    }
    if (randomBoolean()) {
        factory.missing("MISSING");
    }
    if (randomBoolean()) {
        factory.bucketCountThresholds().setRequiredSize(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        factory.bucketCountThresholds().setShardSize(randomIntBetween(1, Integer.MAX_VALUE));
    }
    if (randomBoolean()) {
        int minDocCount = randomInt(4);
        switch(minDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                minDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
        }
        factory.bucketCountThresholds().setMinDocCount(minDocCount);
    }
    if (randomBoolean()) {
        int shardMinDocCount = randomInt(4);
        switch(shardMinDocCount) {
            case 0:
                break;
            case 1:
            case 2:
            case 3:
            case 4:
                shardMinDocCount = randomIntBetween(0, Integer.MAX_VALUE);
                break;
            default:
                fail();
        }
        factory.bucketCountThresholds().setShardMinDocCount(shardMinDocCount);
    }
    if (randomBoolean()) {
        factory.executionHint(randomFrom(executionHints));
    }
    if (randomBoolean()) {
        factory.format("###.##");
    }
    if (randomBoolean()) {
        IncludeExclude incExc = null;
        switch(randomInt(5)) {
            case 0:
                incExc = new IncludeExclude(new RegExp("foobar"), null);
                break;
            case 1:
                incExc = new IncludeExclude(null, new RegExp("foobaz"));
                break;
            case 2:
                incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
                break;
            case 3:
                SortedSet<BytesRef> includeValues = new TreeSet<>();
                int numIncs = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs; i++) {
                    includeValues.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues = null;
                incExc = new IncludeExclude(includeValues, excludeValues);
                break;
            case 4:
                SortedSet<BytesRef> includeValues2 = null;
                SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
                int numExcs2 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs2; i++) {
                    excludeValues2.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues2, excludeValues2);
                break;
            case 5:
                SortedSet<BytesRef> includeValues3 = new TreeSet<>();
                int numIncs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numIncs3; i++) {
                    includeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
                int numExcs3 = randomIntBetween(1, 20);
                for (int i = 0; i < numExcs3; i++) {
                    excludeValues3.add(new BytesRef(randomAsciiOfLengthBetween(1, 30)));
                }
                incExc = new IncludeExclude(includeValues3, excludeValues3);
                break;
            default:
                fail();
        }
        factory.includeExclude(incExc);
    }
    if (randomBoolean()) {
        SignificanceHeuristic significanceHeuristic = null;
        switch(randomInt(5)) {
            case 0:
                significanceHeuristic = new PercentageScore();
                break;
            case 1:
                significanceHeuristic = new ChiSquare(randomBoolean(), randomBoolean());
                break;
            case 2:
                significanceHeuristic = new GND(randomBoolean());
                break;
            case 3:
                significanceHeuristic = new MutualInformation(randomBoolean(), randomBoolean());
                break;
            case 4:
                significanceHeuristic = new ScriptHeuristic(new Script("foo"));
                break;
            case 5:
                significanceHeuristic = new JLHScore();
                break;
            default:
                fail();
        }
        factory.significanceHeuristic(significanceHeuristic);
    }
    if (randomBoolean()) {
        factory.backgroundFilter(QueryBuilders.termsQuery("foo", "bar"));
    }
    return factory;
}
Also used : Script(org.elasticsearch.script.Script) JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) RegExp(org.apache.lucene.util.automaton.RegExp) IncludeExclude(org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude) PercentageScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore) TreeSet(java.util.TreeSet) ScriptHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ScriptHeuristic) SignificanceHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation) SignificantTermsAggregationBuilder(org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder) GND(org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND) BytesRef(org.apache.lucene.util.BytesRef)

Example 3 with ChiSquare

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.

the class SignificanceHeuristicTests method testBasicScoreProperties.

public void testBasicScoreProperties() {
    basicScoreProperties(new JLHScore(), true);
    basicScoreProperties(new GND(true), true);
    basicScoreProperties(new PercentageScore(), true);
    basicScoreProperties(new MutualInformation(true, true), false);
    basicScoreProperties(new ChiSquare(true, true), false);
}
Also used : JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation) GND(org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND) PercentageScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore)

Example 4 with ChiSquare

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.

the class SignificanceHeuristicTests method getRandomSignificanceheuristic.

SignificanceHeuristic getRandomSignificanceheuristic() {
    List<SignificanceHeuristic> heuristics = new ArrayList<>();
    heuristics.add(new JLHScore());
    heuristics.add(new MutualInformation(randomBoolean(), randomBoolean()));
    heuristics.add(new GND(randomBoolean()));
    heuristics.add(new ChiSquare(randomBoolean(), randomBoolean()));
    return heuristics.get(randomInt(3));
}
Also used : JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) ArrayList(java.util.ArrayList) SignificanceHeuristic(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation) GND(org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)

Example 5 with ChiSquare

use of org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare in project elasticsearch by elastic.

the class SignificanceHeuristicTests method testBuilderAndParser.

// test that
// 1. The output of the builders can actually be parsed
// 2. The parser does not swallow parameters after a significance heuristic was defined
public void testBuilderAndParser() throws Exception {
    SearchModule searchModule = new SearchModule(Settings.EMPTY, false, emptyList());
    ParseFieldRegistry<SignificanceHeuristicParser> heuristicParserMapper = searchModule.getSignificanceHeuristicParserRegistry();
    // test jlh with string
    assertTrue(parseFromString(heuristicParserMapper, "\"jlh\":{}") instanceof JLHScore);
    // test gnd with string
    assertTrue(parseFromString(heuristicParserMapper, "\"gnd\":{}") instanceof GND);
    // test mutual information with string
    boolean includeNegatives = randomBoolean();
    boolean backgroundIsSuperset = randomBoolean();
    String mutual = "\"mutual_information\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}";
    assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), parseFromString(heuristicParserMapper, mutual));
    String chiSquare = "\"chi_square\":{\"include_negatives\": " + includeNegatives + ", \"background_is_superset\":" + backgroundIsSuperset + "}";
    assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), parseFromString(heuristicParserMapper, chiSquare));
    // test with builders
    assertThat(parseFromBuilder(heuristicParserMapper, new JLHScore()), instanceOf(JLHScore.class));
    assertThat(parseFromBuilder(heuristicParserMapper, new GND(backgroundIsSuperset)), instanceOf(GND.class));
    assertEquals(new MutualInformation(includeNegatives, backgroundIsSuperset), parseFromBuilder(heuristicParserMapper, new MutualInformation(includeNegatives, backgroundIsSuperset)));
    assertEquals(new ChiSquare(includeNegatives, backgroundIsSuperset), parseFromBuilder(heuristicParserMapper, new ChiSquare(includeNegatives, backgroundIsSuperset)));
    // test exceptions
    String faultyHeuristicdefinition = "\"mutual_information\":{\"include_negatives\": false, \"some_unknown_field\": false}";
    String expectedError = "unknown field [some_unknown_field]";
    checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
    faultyHeuristicdefinition = "\"chi_square\":{\"unknown_field\": true}";
    expectedError = "unknown field [unknown_field]";
    checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
    faultyHeuristicdefinition = "\"jlh\":{\"unknown_field\": true}";
    expectedError = "expected an empty object, but found ";
    checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
    faultyHeuristicdefinition = "\"gnd\":{\"unknown_field\": true}";
    expectedError = "unknown field [unknown_field]";
    checkParseException(heuristicParserMapper, faultyHeuristicdefinition, expectedError);
}
Also used : JLHScore(org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore) ChiSquare(org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare) SearchModule(org.elasticsearch.search.SearchModule) MutualInformation(org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation) Matchers.containsString(org.hamcrest.Matchers.containsString) SignificanceHeuristicParser(org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser) GND(org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)

Aggregations

ChiSquare (org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare)8 MutualInformation (org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation)7 GND (org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND)6 JLHScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore)5 PercentageScore (org.elasticsearch.search.aggregations.bucket.significant.heuristics.PercentageScore)3 SignificanceHeuristic (org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic)2 ArrayList (java.util.ArrayList)1 TreeSet (java.util.TreeSet)1 BytesRef (org.apache.lucene.util.BytesRef)1 RegExp (org.apache.lucene.util.automaton.RegExp)1 SearchResponse (org.elasticsearch.action.search.SearchResponse)1 TermQueryBuilder (org.elasticsearch.index.query.TermQueryBuilder)1 Script (org.elasticsearch.script.Script)1 SearchModule (org.elasticsearch.search.SearchModule)1 SignificantTerms (org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms)1 SignificantTermsAggregationBuilder (org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregationBuilder)1 ScriptHeuristic (org.elasticsearch.search.aggregations.bucket.significant.heuristics.ScriptHeuristic)1 SignificanceHeuristicParser (org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser)1 IncludeExclude (org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude)1 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)1