use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestSimplePatternTokenizer method testEndLookahead.
public void testEndLookahead() throws Exception {
Tokenizer t = new SimplePatternTokenizer("(ab)+");
t.setReader(new StringReader("aba"));
assertTokenStreamContents(t, new String[] { "ab" }, new int[] { 0 }, new int[] { 2 }, 3);
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestSimplePatternTokenizer method testBasic.
public void testBasic() throws Exception {
// get stuff between "'"
String qpattern = "\\'([^\\']+)\\'";
String[][] tests = { // pattern input output
{ ":", "boo:and:foo", ": :" }, { qpattern, "aaa 'bbb' 'ccc'", "'bbb' 'ccc'" } };
for (String[] test : tests) {
TokenStream stream = new SimplePatternTokenizer(test[0]);
((Tokenizer) stream).setReader(new StringReader(test[1]));
String out = tsToString(stream);
assertEquals("pattern: " + test[0] + " with input: " + test[1], test[2], out);
}
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestSimplePatternTokenizer method testEmptyStringPatternOneMatch.
public void testEmptyStringPatternOneMatch() throws Exception {
Tokenizer t = new SimplePatternTokenizer("a*");
CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
t.setReader(new StringReader("bbab"));
t.reset();
assertTrue(t.incrementToken());
assertEquals("a", termAtt.toString());
assertFalse(t.incrementToken());
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestSimplePatternTokenizer method testOffsetCorrection.
public void testOffsetCorrection() throws Exception {
final String INPUT = "Günther Günther is here";
// create MappingCharFilter
List<String> mappingRules = new ArrayList<>();
mappingRules.add("\"ü\" => \"ü\"");
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
builder.add("ü", "ü");
NormalizeCharMap normMap = builder.build();
CharFilter charStream = new MappingCharFilter(normMap, new StringReader(INPUT));
// create SimplePatternTokenizer
Tokenizer stream = new SimplePatternTokenizer("Günther");
stream.setReader(charStream);
assertTokenStreamContents(stream, new String[] { "Günther", "Günther" }, new int[] { 0, 13 }, new int[] { 12, 25 }, INPUT.length());
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPortugueseStemFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new PortugueseStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
Aggregations