use of org.apache.commons.codec.language.bm.PhoneticEngine in project lucene-solr by apache.
the class TestBeiderMorseFilter method testCustomAttribute.
public void testCustomAttribute() throws IOException {
TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false);
((Tokenizer) stream).setReader(new StringReader("D'Angelo"));
stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
stream.reset();
int i = 0;
while (stream.incrementToken()) {
assertTrue(keyAtt.isKeyword());
i++;
}
assertEquals(12, i);
stream.end();
stream.close();
}
use of org.apache.commons.codec.language.bm.PhoneticEngine in project lucene-solr by apache.
the class TestBeiderMorseFilter method testLanguageSet.
/** restrict the output to a set of possible origin languages */
public void testLanguageSet() throws Exception {
final LanguageSet languages = LanguageSet.from(new HashSet<String>() {
{
add("italian");
add("greek");
add("spanish");
}
});
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true), languages));
}
};
assertAnalyzesTo(analyzer, "Angelo", new String[] { "andZelo", "angelo", "anxelo" }, new int[] { 0, 0, 0 }, new int[] { 6, 6, 6 }, new int[] { 1, 0, 0 });
analyzer.close();
}
use of org.apache.commons.codec.language.bm.PhoneticEngine in project lucene-solr by apache.
the class TestBeiderMorseFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.commons.codec.language.bm.PhoneticEngine in project lucene-solr by apache.
the class TestBeiderMorseFilter method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
}
};
}
Aggregations