use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method testFullWidthChar.
public void testFullWidthChar() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("!"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "full-width-exclamation" }, new int[] { 0 }, new int[] { 1 }, 1);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method test1to3.
public void test1to3() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("k"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "kkk" }, new int[] { 0 }, new int[] { 1 }, 1);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method testTokenStream.
//
// 1111111111222
// 01234567890123456789012
//(in) h i j k ll cccc bbb aa
//
// 1111111111222
// 01234567890123456789012
//(out) i i jj kkk llll cc b a
//
// h, 0, 1 => i, 0, 1
// i, 2, 3 => i, 2, 3
// j, 4, 5 => jj, 4, 5
// k, 6, 7 => kkk, 6, 7
// ll, 8,10 => llll, 8,10
// cccc,11,15 => cc,11,15
// bbb,16,19 => b,16,19
// aa,20,22 => a,20,22
//
public void testTokenStream() throws Exception {
String testString = "h i j k ll cccc bbb aa";
CharFilter cs = new MappingCharFilter(normMap, new StringReader(testString));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "i", "i", "jj", "kkk", "llll", "cc", "b", "a" }, new int[] { 0, 2, 4, 6, 8, 11, 16, 20 }, new int[] { 1, 3, 5, 7, 10, 15, 19, 22 }, testString.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method testChained.
//
//
// 0123456789
//(in) aaaa ll h
//(out-1) aa llll i
//(out-2) a llllllll i
//
// aaaa,0,4 => a,0,4
// ll,5,7 => llllllll,5,7
// h,8,9 => i,8,9
public void testChained() throws Exception {
String testString = "aaaa ll h";
CharFilter cs = new MappingCharFilter(normMap, new MappingCharFilter(normMap, new StringReader(testString)));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "a", "llllllll", "i" }, new int[] { 0, 5, 8 }, new int[] { 4, 7, 9 }, testString.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestMappingCharFilter method test1to2.
public void test1to2() throws Exception {
CharFilter cs = new MappingCharFilter(normMap, new StringReader("j"));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "jj" }, new int[] { 0 }, new int[] { 1 }, 1);
}
Aggregations