use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestEnglishMinimalStemFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestGalicianMinimalStemFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new GalicianMinimalStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class TestKStemmer method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class EdgeNGramTokenFilterTest method testEmptyTerm.
public void testEmptyTerm() throws Exception {
Random random = random();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new EdgeNGramTokenFilter(tokenizer, 2, 15));
}
};
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
use of org.apache.lucene.analysis.core.KeywordTokenizer in project lucene-solr by apache.
the class EdgeNGramTokenFilterTest method testSupplementaryCharacters.
public void testSupplementaryCharacters() throws IOException {
final String s = TestUtil.randomUnicodeString(random(), 10);
final int codePointCount = s.codePointCount(0, s.length());
final int minGram = TestUtil.nextInt(random(), 1, 3);
final int maxGram = TestUtil.nextInt(random(), minGram, 10);
TokenStream tk = new KeywordTokenizer();
((Tokenizer) tk).setReader(new StringReader(s));
tk = new EdgeNGramTokenFilter(tk, minGram, maxGram);
final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
tk.reset();
for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
assertTrue(tk.incrementToken());
assertEquals(0, offsetAtt.startOffset());
assertEquals(s.length(), offsetAtt.endOffset());
final int end = Character.offsetByCodePoints(s, 0, i);
assertEquals(s.substring(0, end), termAtt.toString());
}
assertFalse(tk.incrementToken());
}
Aggregations