use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestICUNormalizer2CharFilter method testMassiveLigature.
public void testMassiveLigature() throws IOException {
String input = "ﷺ";
CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input), Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenStream.setReader(reader);
assertTokenStreamContents(tokenStream, new String[] { "صلى", "الله", "عليه", "وسلم" }, new int[] { 0, 0, 0, 0 }, new int[] { 0, 0, 0, 1 }, input.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method test1to1.
public void test1to1() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("h"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "i" }, new int[] { 0 }, new int[] { 1 }, 1);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method test5to0.
public void test5to0() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("empty"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[0], new int[] {}, new int[] {}, 5);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method testNothingChange.
public void testNothingChange() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("x"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "x" }, new int[] { 0 }, new int[] { 1 }, 1);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method test2to4.
public void test2to4() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("ll"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "llll" }, new int[] { 0 }, new int[] { 2 }, 2);
}
Aggregations