Search in sources :

Example 11 with CharacterRunAutomaton

use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.

the class TestQPHelper method testBoost.

public void testBoost() throws Exception {
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
    Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
    StandardQueryParser qp = new StandardQueryParser();
    qp.setAnalyzer(oneStopAnalyzer);
    Query q = qp.parse("on^1.0", "field");
    assertNotNull(q);
    q = qp.parse("\"hello\"^2.0", "field");
    assertNotNull(q);
    assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
    q = qp.parse("hello^2.0", "field");
    assertNotNull(q);
    assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
    q = qp.parse("\"on\"^1.0", "field");
    assertNotNull(q);
    StandardQueryParser qp2 = new StandardQueryParser();
    qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
    q = qp2.parse("the^3", "field");
    // "the" is a stop word so the result is an empty query:
    assertNotNull(q);
    assertMatchNoDocsQuery(q);
    assertFalse(q instanceof BoostQuery);
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 12 with CharacterRunAutomaton

use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.

the class QueryParserTestBase method testPhraseQueryPositionIncrements.

public void testPhraseQueryPositionIncrements() throws Exception {
    CharacterRunAutomaton stopStopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
    CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
    qp.setEnablePositionIncrements(true);
    PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
    phraseQuery.add(new Term("field", "1"));
    phraseQuery.add(new Term("field", "2"), 2);
    assertEquals(phraseQuery.build(), getQuery("\"1 stop 2\"", qp));
}
Also used : RegExp(org.apache.lucene.util.automaton.RegExp) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Term(org.apache.lucene.index.Term) CommonQueryParserConfiguration(org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration)

Example 13 with CharacterRunAutomaton

use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.

the class Stemmer method checkCondition.

/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len) {
    if (condition != 0) {
        CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
        int state = 0;
        for (int i = c1off; i < c1off + c1len; i++) {
            state = pattern.step(state, c1[i]);
            if (state == -1) {
                return false;
            }
        }
        for (int i = c2off; i < c2off + c2len; i++) {
            state = pattern.step(state, c2[i]);
            if (state == -1) {
                return false;
            }
        }
        return pattern.isAccept(state);
    }
    return true;
}
Also used : CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton)

Example 14 with CharacterRunAutomaton

use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project crate by crate.

the class XContentMapValues method filter.

/**
 * Returns a function that filters a document map based on the given include and exclude rules.
 * @see #filter(Map, String[], String[]) for details
 */
public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) {
    CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
    CharacterRunAutomaton include;
    if (includes == null || includes.length == 0) {
        include = matchAllAutomaton;
    } else {
        Automaton includeA = Regex.simpleMatchToAutomaton(includes);
        includeA = makeMatchDotsInFieldNames(includeA);
        include = new CharacterRunAutomaton(includeA);
    }
    Automaton excludeA;
    if (excludes == null || excludes.length == 0) {
        excludeA = Automata.makeEmpty();
    } else {
        excludeA = Regex.simpleMatchToAutomaton(excludes);
        excludeA = makeMatchDotsInFieldNames(excludeA);
    }
    CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);
    return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
}
Also used : Arrays(java.util.Arrays) Numbers(org.elasticsearch.common.Numbers) Automaton(org.apache.lucene.util.automaton.Automaton) HashMap(java.util.HashMap) Function(java.util.function.Function) Strings(org.elasticsearch.common.Strings) ArrayList(java.util.ArrayList) Booleans(io.crate.common.Booleans) List(java.util.List) Operations(org.apache.lucene.util.automaton.Operations) Map(java.util.Map) Regex(org.elasticsearch.common.regex.Regex) ElasticsearchParseException(org.elasticsearch.ElasticsearchParseException) Automata(org.apache.lucene.util.automaton.Automata) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton)

Example 15 with CharacterRunAutomaton

use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project elasticsearch by elastic.

the class TransportReindexAction method buildRemoteWhitelist.

/**
     * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist
     * the world.
     */
static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) {
    if (whitelist.isEmpty()) {
        return new CharacterRunAutomaton(Automata.makeEmpty());
    }
    Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY));
    automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    if (Operations.isTotal(automaton)) {
        throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them.");
    }
    return new CharacterRunAutomaton(automaton);
}
Also used : Automaton(org.apache.lucene.util.automaton.Automaton) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton)

Aggregations

CharacterRunAutomaton (org.apache.lucene.util.automaton.CharacterRunAutomaton)36 RegExp (org.apache.lucene.util.automaton.RegExp)15 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)10 ArrayList (java.util.ArrayList)7 Term (org.apache.lucene.index.Term)7 PhraseQuery (org.apache.lucene.search.PhraseQuery)6 Query (org.apache.lucene.search.Query)6 TermQuery (org.apache.lucene.search.TermQuery)6 List (java.util.List)5 Analyzer (org.apache.lucene.analysis.Analyzer)5 BooleanQuery (org.apache.lucene.search.BooleanQuery)5 BoostQuery (org.apache.lucene.search.BoostQuery)5 Automaton (org.apache.lucene.util.automaton.Automaton)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 Document (org.apache.lucene.document.Document)4 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)4 PrefixQuery (org.apache.lucene.search.PrefixQuery)4 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)4 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)3