use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testDoKeepOrig.
public void testDoKeepOrig() throws Exception {
b = new SynonymMap.Builder(true);
add("a b", "foo", true);
final SynonymMap map = b.build();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
}
};
assertAnalyzesTo(analyzer, "a b c", new String[] { "a", "foo", "b", "c" }, new int[] { 0, 0, 2, 4 }, new int[] { 1, 3, 3, 5 }, null, new int[] { 1, 0, 1, 1 }, new int[] { 1, 2, 1, 1 }, true);
checkAnalysisConsistency(random(), analyzer, false, "a b c");
analyzer.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testRecursion4.
public void testRecursion4() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = true;
add("zoo zoo", "zoo", keepOrig);
add("zoo", "zoo zoo", keepOrig);
final SynonymMap map = b.build();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
}
};
assertAnalyzesTo(a, "zoo zoo $ zoo", new String[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" }, new int[] { 1, 0, 1, 1, 1, 0, 1 });
a.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testRandomHuge.
/** simple random test like testRandom2, but for larger docs
*/
public void testRandomHuge() throws Exception {
Random random = random();
final int numIters = atLeast(3);
for (int i = 0; i < numIters; i++) {
b = new SynonymMap.Builder(random.nextBoolean());
final int numEntries = atLeast(10);
if (VERBOSE) {
System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
}
for (int j = 0; j < numEntries; j++) {
add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random.nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
}
};
checkRandomData(random, analyzer, 100, 1024);
analyzer.close();
}
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testBasic2.
public void testBasic2() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = false;
add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
add("bbb", "bbbb1 bbbb2", keepOrig);
tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokensIn.setReader(new StringReader("a"));
tokensIn.reset();
assertTrue(tokensIn.incrementToken());
assertFalse(tokensIn.incrementToken());
tokensIn.end();
tokensIn.close();
tokensOut = new SynonymFilter(tokensIn, b.build(), true);
termAtt = tokensOut.addAttribute(CharTermAttribute.class);
posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
if (keepOrig) {
verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
} else {
verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
}
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testOutputHangsOffEnd.
public void testOutputHangsOffEnd() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = false;
// b hangs off the end (no input token under it):
add("a", "a b", keepOrig);
tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokensIn.setReader(new StringReader("a"));
tokensIn.reset();
assertTrue(tokensIn.incrementToken());
assertFalse(tokensIn.incrementToken());
tokensIn.end();
tokensIn.close();
tokensOut = new SynonymFilter(tokensIn, b.build(), true);
termAtt = tokensOut.addAttribute(CharTermAttribute.class);
posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
// Make sure endOffset inherits from previous input token:
verify("a", "a b:1");
}
Aggregations