use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testOutputHangsOffEnd.
public void testOutputHangsOffEnd() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = false;
// b hangs off the end (no input token under it):
add("a", "a b", keepOrig);
tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokensIn.setReader(new StringReader("a"));
tokensIn.reset();
assertTrue(tokensIn.incrementToken());
assertFalse(tokensIn.incrementToken());
tokensIn.end();
tokensIn.close();
tokensOut = new SynonymFilter(tokensIn, b.build(), true);
termAtt = tokensOut.addAttribute(CharTermAttribute.class);
posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
// Make sure endOffset inherits from previous input token:
verify("a", "a b:1");
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testMultiwordOffsets.
public void testMultiwordOffsets() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = true;
add("national hockey league", "nhl", keepOrig);
final SynonymMap map = b.build();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
}
};
assertAnalyzesTo(a, "national hockey league", new String[] { "national", "nhl", "hockey", "league" }, new int[] { 0, 0, 9, 16 }, new int[] { 8, 22, 15, 22 }, new int[] { 1, 0, 1, 1 });
a.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testRecursion3.
public void testRecursion3() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = true;
add("zoo zoo", "zoo", keepOrig);
final SynonymMap map = b.build();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
}
};
assertAnalyzesTo(a, "zoo zoo $ zoo", new String[] { "zoo", "zoo", "zoo", "$", "zoo" }, new int[] { 1, 0, 1, 1, 1 });
a.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testBasic.
public void testBasic() throws Exception {
b = new SynonymMap.Builder(true);
add("a", "foo", true);
add("a b", "bar fee", true);
add("b c", "dog collar", true);
add("c d", "dog harness holder extras", true);
add("m c e", "dog barks loudly", false);
add("i j k", "feep", true);
add("e f", "foo bar", false);
add("e f", "baz bee", false);
add("z", "boo", false);
add("y", "bee", true);
tokensIn = new MockTokenizer(MockTokenizer.WHITESPACE, true);
tokensIn.setReader(new StringReader("a"));
tokensIn.reset();
assertTrue(tokensIn.incrementToken());
assertFalse(tokensIn.incrementToken());
tokensIn.end();
tokensIn.close();
tokensOut = new SynonymFilter(tokensIn, b.build(), true);
termAtt = tokensOut.addAttribute(CharTermAttribute.class);
posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
posLenAtt = tokensOut.addAttribute(PositionLengthAttribute.class);
offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
verify("a b c", "a/bar b/fee c");
// syn output extends beyond input tokens
verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");
verify("a b a", "a/bar b/fee a/foo");
// outputs that add to one another:
verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");
// two outputs for same input
verify("e f", "foo/baz bar/bee");
// verify multi-word / single-output offsets:
verify("g i j k g", "g i/feep:7_3 j k g");
// mixed keepOrig true/false:
verify("a m c e x", "a/foo dog barks loudly x");
verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
assertTrue(tokensOut.getCaptureCount() > 0);
// no captureStates when no syns matched
verify("p q r s t", "p q r s t");
assertEquals(0, tokensOut.getCaptureCount());
// no captureStates when only single-input syns, w/ no
// lookahead needed, matched
verify("p q z y t", "p q boo y/bee t");
assertEquals(0, tokensOut.getCaptureCount());
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestSynonymMapFilter method testRecursion2.
public void testRecursion2() throws Exception {
b = new SynonymMap.Builder(true);
final boolean keepOrig = false;
add("zoo", "zoo", keepOrig);
add("zoo", "zoo zoo", keepOrig);
final SynonymMap map = b.build();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
}
};
// verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
assertAnalyzesTo(a, "zoo zoo $ zoo", new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo", "zoo" }, new int[] { 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 });
a.close();
}
Aggregations