use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestJapaneseKatakanaStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("コーヒー"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseKatakanaStemFilter(sink));
}
};
checkOneTerm(a, "コーヒー", "コーヒー");
a.close();
}
use of org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter in project lucene-solr by apache.
the class TestJapaneseBaseFormFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("あり"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new JapaneseTokenizer(newAttributeFactory(), null, true, JapaneseTokenizer.DEFAULT_MODE);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new JapaneseBaseFormFilter(sink));
}
};
assertAnalyzesTo(a, "それはまだ実験段階にあります", new String[] { "それ", "は", "まだ", "実験", "段階", "に", "あり", "ます" });
a.close();
}
Aggregations