use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.
the class TestQPHelper method testBoost.
public void testBoost() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(oneStopAnalyzer);
Query q = qp.parse("on^1.0", "field");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0", "field");
assertNotNull(q);
assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("hello^2.0", "field");
assertNotNull(q);
assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("\"on\"^1.0", "field");
assertNotNull(q);
StandardQueryParser qp2 = new StandardQueryParser();
qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
q = qp2.parse("the^3", "field");
// "the" is a stop word so the result is an empty query:
assertNotNull(q);
assertMatchNoDocsQuery(q);
assertFalse(q instanceof BoostQuery);
}
use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.
the class QueryParserTestBase method testPhraseQueryPositionIncrements.
public void testPhraseQueryPositionIncrements() throws Exception {
CharacterRunAutomaton stopStopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
qp.setEnablePositionIncrements(true);
PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
phraseQuery.add(new Term("field", "1"));
phraseQuery.add(new Term("field", "2"), 2);
assertEquals(phraseQuery.build(), getQuery("\"1 stop 2\"", qp));
}
use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project lucene-solr by apache.
the class Stemmer method checkCondition.
/** checks condition of the concatenation of two strings */
// note: this is pretty stupid, we really should subtract strip from the condition up front and just check the stem
// but this is a little bit more complicated.
private boolean checkCondition(int condition, char[] c1, int c1off, int c1len, char[] c2, int c2off, int c2len) {
if (condition != 0) {
CharacterRunAutomaton pattern = dictionary.patterns.get(condition);
int state = 0;
for (int i = c1off; i < c1off + c1len; i++) {
state = pattern.step(state, c1[i]);
if (state == -1) {
return false;
}
}
for (int i = c2off; i < c2off + c2len; i++) {
state = pattern.step(state, c2[i]);
if (state == -1) {
return false;
}
}
return pattern.isAccept(state);
}
return true;
}
use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project crate by crate.
the class XContentMapValues method filter.
/**
* Returns a function that filters a document map based on the given include and exclude rules.
* @see #filter(Map, String[], String[]) for details
*/
public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) {
CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());
CharacterRunAutomaton include;
if (includes == null || includes.length == 0) {
include = matchAllAutomaton;
} else {
Automaton includeA = Regex.simpleMatchToAutomaton(includes);
includeA = makeMatchDotsInFieldNames(includeA);
include = new CharacterRunAutomaton(includeA);
}
Automaton excludeA;
if (excludes == null || excludes.length == 0) {
excludeA = Automata.makeEmpty();
} else {
excludeA = Regex.simpleMatchToAutomaton(excludes);
excludeA = makeMatchDotsInFieldNames(excludeA);
}
CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);
return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton);
}
use of org.apache.lucene.util.automaton.CharacterRunAutomaton in project elasticsearch by elastic.
the class TransportReindexAction method buildRemoteWhitelist.
/**
* Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist
* the world.
*/
static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) {
if (whitelist.isEmpty()) {
return new CharacterRunAutomaton(Automata.makeEmpty());
}
Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY));
automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
if (Operations.isTotal(automaton)) {
throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them.");
}
return new CharacterRunAutomaton(automaton);
}
Aggregations