use of org.apache.commons.codec.language.bm.Languages.LanguageSet in project lucene-solr by apache.
the class TestBeiderMorseFilter method testLanguageSet.
/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
final LanguageSet languages = LanguageSet.from(new HashSet<String>() {
{
add("italian");
add("greek");
add("spanish");
}
});
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
}
};
assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0 }, new int[] { 6, 6, 6 }, new int[] { 1, 0, 0 });
analyzer.close();
}
Aggregations