use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestJapaneseIterationMarkCharFilterFactory method testIterationMarksWithJapaneseTokenizer.
public void testIterationMarksWithJapaneseTokenizer() throws IOException {
JapaneseTokenizerFactory tokenizerFactory = new JapaneseTokenizerFactory(new HashMap<String, String>());
tokenizerFactory.inform(new StringMockResourceLoader(""));
JapaneseIterationMarkCharFilterFactory filterFactory = new JapaneseIterationMarkCharFilterFactory(new HashMap<String, String>());
CharFilter filter = filterFactory.create(new StringReader("時々馬鹿々々しいところゞゝゝミスヾ"));
TokenStream tokenStream = tokenizerFactory.create(newAttributeFactory());
((Tokenizer) tokenStream).setReader(filter);
assertTokenStreamContents(tokenStream, new String[] { "時時", "馬鹿馬鹿しい", "ところどころ", "ミ", "スズ" });
}
Aggregations