use of org.apache.lucene.util.automaton.RegExp in project lucene-solr by apache.
the class TestMockAnalyzer method testTwoChars.
/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("..").toAutomaton());
Analyzer a = new MockAnalyzer(random(), single, false);
assertAnalyzesTo(a, "foobar", new String[] { "fo", "ob", "ar" }, new int[] { 0, 2, 4 }, new int[] { 2, 4, 6 });
// make sure when last term is a "partial" match that end() is correct
assertTokenStreamContents(a.tokenStream("bogus", "fooba"), new String[] { "fo", "ob" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 }, new Integer(5));
checkRandomData(random(), a, 100);
}
use of org.apache.lucene.util.automaton.RegExp in project lucene-solr by apache.
the class TestMockAnalyzer method testUppercase.
/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
CharacterRunAutomaton single = new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
Analyzer a = new MockAnalyzer(random(), single, false);
assertAnalyzesTo(a, "FooBarBAZ", new String[] { "Foo", "Bar", "B", "A", "Z" }, new int[] { 0, 3, 6, 7, 8 }, new int[] { 3, 6, 7, 8, 9 });
assertAnalyzesTo(a, "aFooBar", new String[] { "Foo", "Bar" }, new int[] { 1, 4 }, new int[] { 4, 7 });
checkRandomData(random(), a, 100);
}
use of org.apache.lucene.util.automaton.RegExp in project lucene-solr by apache.
the class QueryParserTestBase method testStopwords.
public void testStopwords() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
Query result = getQuery("field:the OR field:foo", qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
if (result instanceof BooleanQuery) {
assertEquals(0, ((BooleanQuery) result).clauses().size());
}
result = getQuery("field:woo OR field:the", qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a TermQuery", result instanceof TermQuery);
result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
result = ((BoostQuery) result).getQuery();
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
if (VERBOSE)
System.out.println("Result: " + result);
assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
use of org.apache.lucene.util.automaton.RegExp in project SearchServices by Alfresco.
the class MinHashFilterTest method createMockShingleTokenizer.
public static Tokenizer createMockShingleTokenizer(int shingleSize, String shingles) {
MockTokenizer tokenizer = new MockTokenizer(new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){4}").toAutomaton()), true);
tokenizer.setEnableChecks(true);
if (shingles != null) {
tokenizer.setReader(new StringReader(shingles));
}
return tokenizer;
}
use of org.apache.lucene.util.automaton.RegExp in project crate by crate.
the class RegexpMatchOperator method evaluate.
@Override
public Boolean evaluate(TransactionContext txnCtx, NodeContext nodeCtx, Input<String>[] args) {
assert args.length == 2 : "invalid number of arguments";
String source = args[0].value();
if (source == null) {
return null;
}
String pattern = args[1].value();
if (pattern == null) {
return null;
}
if (isPcrePattern(pattern)) {
return source.matches(pattern);
} else {
RegExp regexp = new RegExp(pattern);
ByteRunAutomaton regexpRunAutomaton = new ByteRunAutomaton(regexp.toAutomaton());
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
return regexpRunAutomaton.run(bytes, 0, bytes.length);
}
}
Aggregations