use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestStemmerOverrideFilter method testIgnoreCase.
public void testIgnoreCase() throws IOException {
// lets make booked stem to books
// the override filter will convert "booked" to "books",
// but also mark it with KeywordAttribute so Porter will not change it.
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
builder.add("boOkEd", "books");
Tokenizer tokenizer = keywordTokenizer("BooKeD");
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] { "books" });
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestStemmerOverrideFilter method testNoOverrides.
public void testNoOverrides() throws IOException {
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(true);
Tokenizer tokenizer = keywordTokenizer("book");
TokenStream stream = new PorterStemFilter(new StemmerOverrideFilter(tokenizer, builder.build()));
assertTokenStreamContents(stream, new String[] { "book" });
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestStemmerOverrideFilter method keywordTokenizer.
private KeywordTokenizer keywordTokenizer(String data) throws IOException {
KeywordTokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(new StringReader(data));
return tokenizer;
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestTrimFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Random random = random();
for (int i = 0; i < 512; i++) {
final int flags = i;
final CharArraySet protectedWords;
if (random.nextBoolean()) {
protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
} else {
protectedWords = null;
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, protectedWords));
}
};
// depending upon options, this thing may or may not preserve the empty term
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
}
Aggregations