use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestPatternReplaceCharFilter method test2blocksMultiMatches.
// 11111111112222222222333333333
// 012345678901234567890123456789012345678
// aa bb cc --- aa bb aa. bb aa bb cc
// aa##bb cc --- aa##bb aa. bb aa##bb cc
// aa bb cc --- aa bbbaa. bb aa b cc
public void test2blocksMultiMatches() throws IOException {
final String BLOCK = " aa bb cc --- aa bb aa. bb aa bb cc";
CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)"), "$1##$2", new StringReader(BLOCK));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "aa##bb", "cc", "---", "aa##bb", "aa.", "bb", "aa##bb", "cc" }, new int[] { 2, 8, 11, 15, 21, 25, 28, 36 }, new int[] { 7, 10, 14, 20, 24, 27, 35, 38 }, BLOCK.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestPatternReplaceCharFilter method test1blockMultiMatches.
// 111111111122222222223333
// 0123456789012345678901234567890123
// aa bb cc --- aa bb aa bb cc
// aa bb cc --- aa bb aa bb cc
public void test1blockMultiMatches() throws IOException {
final String BLOCK = " aa bb cc --- aa bb aa bb cc";
CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1 $2 $3", new StringReader(BLOCK));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "aa", "bb", "cc", "---", "aa", "bb", "aa", "bb", "cc" }, new int[] { 2, 6, 9, 11, 15, 18, 21, 25, 29 }, new int[] { 4, 8, 10, 14, 17, 20, 23, 27, 33 }, BLOCK.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestPatternReplaceCharFilter method test1block1matchSameLength.
// 012345678
// aa bb cc
// aa#bb#cc
public void test1block1matchSameLength() throws IOException {
final String BLOCK = "aa bb cc";
CharFilter cs = new PatternReplaceCharFilter(pattern("(aa)\\s+(bb)\\s+(cc)"), "$1#$2#$3", new StringReader(BLOCK));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "aa#bb#cc" }, new int[] { 0 }, new int[] { 8 }, BLOCK.length());
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestPatternReplaceCharFilter method checkOutput.
private void checkOutput(String input, String pattern, String replacement, String expectedOutput, String expectedIndexMatchedOutput) throws IOException {
CharFilter cs = new PatternReplaceCharFilter(pattern(pattern), replacement, new StringReader(input));
StringBuilder output = new StringBuilder();
for (int chr = cs.read(); chr > 0; chr = cs.read()) {
output.append((char) chr);
}
StringBuilder indexMatched = new StringBuilder();
for (int i = 0; i < output.length(); i++) {
indexMatched.append((cs.correctOffset(i) < 0 ? "-" : input.charAt(cs.correctOffset(i))));
}
boolean outputGood = expectedOutput.equals(output.toString());
boolean indexMatchedGood = expectedIndexMatchedOutput.equals(indexMatched.toString());
if (!outputGood || !indexMatchedGood || false) {
System.out.println("Pattern : " + pattern);
System.out.println("Replac. : " + replacement);
System.out.println("Input : " + input);
System.out.println("Output : " + output);
System.out.println("Expected: " + expectedOutput);
System.out.println("Output/i: " + indexMatched);
System.out.println("Expected: " + expectedIndexMatchedOutput);
System.out.println();
}
assertTrue("Output doesn't match.", outputGood);
assertTrue("Index-matched output doesn't match.", indexMatchedGood);
}
use of org.apache.lucene.analysis.CharFilter in project lucene-solr by apache.
the class TestPatternReplaceCharFilter method test1block2matchLonger.
// 01234567
// a a
// aa aa
public void test1block2matchLonger() throws IOException {
final String BLOCK = " a a";
CharFilter cs = new PatternReplaceCharFilter(pattern("a"), "aa", new StringReader(BLOCK));
TokenStream ts = whitespaceMockTokenizer(cs);
assertTokenStreamContents(ts, new String[] { "aa", "aa" }, new int[] { 1, 4 }, new int[] { 2, 5 }, BLOCK.length());
}
Aggregations