use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class HTMLStripCharFilterTest method testUTF16Surrogates.
public void testUTF16Surrogates() throws Exception {
Analyzer analyzer = newTestAnalyzer();
// Paired surrogates
assertAnalyzesTo(analyzer, " one two ��three", new String[] { "one", "two", "𫀁three" });
assertAnalyzesTo(analyzer, " ��", new String[] { "𫀁" });
assertAnalyzesTo(analyzer, " ��", new String[] { "𫀁" });
assertAnalyzesTo(analyzer, " ��", new String[] { "𫀁" });
// Improperly paired surrogates
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " 훚�", new String[] { "훚�" });
assertAnalyzesTo(analyzer, " 훚�", new String[] { "훚�" });
// Unpaired high surrogates
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �<br>", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �<br>", new String[] { "�" });
// Unpaired low surrogates
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �<br>", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �", new String[] { "�" });
assertAnalyzesTo(analyzer, " �<br>", new String[] { "�" });
analyzer.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestArabicNormalizationFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new ArabicNormalizationFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestBulgarianAnalyzer method testBasicExamples.
/**
* Test some examples from the paper
*/
public void testBasicExamples() throws IOException {
Analyzer a = new BulgarianAnalyzer();
assertAnalyzesTo(a, "енергийни кризи", new String[] { "енергийн", "криз" });
assertAnalyzesTo(a, "Атомната енергия", new String[] { "атомн", "енерг" });
assertAnalyzesTo(a, "компютри", new String[] { "компютр" });
assertAnalyzesTo(a, "компютър", new String[] { "компютр" });
assertAnalyzesTo(a, "градове", new String[] { "град" });
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestBrazilianAnalyzer method testReusableTokenStream.
public void testReusableTokenStream() throws Exception {
Analyzer a = new BrazilianAnalyzer();
checkReuse(a, "boa", "boa");
checkReuse(a, "boainain", "boainain");
checkReuse(a, "boas", "boas");
// removes diacritic: different from snowball portugese
checkReuse(a, "bôas", "boas");
a.close();
}
use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.
the class TestBrazilianAnalyzer method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new BrazilianStemFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
a.close();
}
Aggregations