use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestFingerprintFilterFactory method test.
public void test() throws Exception {
for (final boolean consumeAll : new boolean[] { true, false }) {
Reader reader = new StringReader("A1 B2 A1 D4 C3");
MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(reader);
tokenizer.setEnableChecks(consumeAll);
TokenStream stream = tokenizer;
stream = tokenFilterFactory("Fingerprint", FingerprintFilterFactory.MAX_OUTPUT_TOKEN_SIZE_KEY, "256", FingerprintFilterFactory.SEPARATOR_KEY, "_").create(stream);
assertTokenStreamContents(stream, new String[] { "A1_B2_C3_D4" });
}
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestHyphenatedWordsFilter method testRandomString.
/** blast some random strings through the analyzer */
public void testRandomString() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestHyphenatedWordsFilter method testOffsets.
public void testOffsets() throws Exception {
String input = "abc- def geh 1234- 5678-";
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) ts).setReader(new StringReader(input));
ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts, new String[] { "abcdef", "geh", "12345678-" }, new int[] { 0, 9, 13 }, new int[] { 8, 12, 24 });
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestHyphenatedWordsFilter method testHyphenAtEnd.
/**
* Test that HyphenatedWordsFilter behaves correctly with a final hyphen
*/
public void testHyphenAtEnd() throws Exception {
String input = "ecologi-\r\ncal devel-\r\n\r\nop compre- hensive-hands-on and ecology-";
// first test
TokenStream ts = new MockTokenizer(MockTokenizer.WHITESPACE, false);
((Tokenizer) ts).setReader(new StringReader(input));
ts = new HyphenatedWordsFilter(ts);
assertTokenStreamContents(ts, new String[] { "ecological", "develop", "comprehensive-hands-on", "and", "ecology-" });
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestKeepWordFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
final Set<String> words = new HashSet<>();
words.add("a");
words.add("b");
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream stream = new KeepWordFilter(tokenizer, new CharArraySet(words, true));
return new TokenStreamComponents(tokenizer, stream);
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
}
Aggregations