use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestBulgarianStemmer method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("строеве");
MockTokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenStream.setReader(new StringReader("строевете строеве"));
BulgarianStemFilter filter = new BulgarianStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
assertTokenStreamContents(filter, new String[] { "строй", "строеве" });
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestBrazilianAnalyzer method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("Brasília");
Tokenizer tokenizer = new LowerCaseTokenizer();
tokenizer.setReader(new StringReader("Brasília Brasilia"));
BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestArabicStemFilter method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("ساهدهات");
MockTokenizer tokenStream = whitespaceMockTokenizer("ساهدهات");
ArabicStemFilter filter = new ArabicStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
assertTokenStreamContents(filter, new String[] { "ساهدهات" });
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestJapaneseKatakanaStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("コーヒー"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink));
}
};
checkOneTerm(a, "コーヒー", "コーヒー");
a.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestJapaneseBaseFormFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("あり"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.DEFAULT_MODE);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
}
};
assertAnalyzesTo(a, "それはまだ実験段階にあります", new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" });
a.close();
}
Aggregations