use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestHunspellStemFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, dictionary));
}
};
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
analyzer.close();
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestHunspellStemFilter method testLongestOnly.
/** simple test for longestOnly option */
public void testLongestOnly() throws IOException {
MockTokenizer tokenizer = whitespaceMockTokenizer("lucene is awesome");
tokenizer.setEnableChecks(true);
HunspellStemFilter filter = new HunspellStemFilter(tokenizer, dictionary, true, true);
assertTokenStreamContents(filter, new String[] { "lucene", "is", "awesome" }, new int[] { 1, 1, 1 });
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestArabicNormalizationFilter method check.
private void check(final String input, final String expected) throws IOException {
MockTokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenStream.setReader(new StringReader(input));
ArabicNormalizationFilter filter = new ArabicNormalizationFilter(tokenStream);
assertTokenStreamContents(filter, new String[] { expected });
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestBrazilianStemFilterFactory method testStemming.
/**
* Ensure the filter actually stems and normalizes text.
*/
public void testStemming() throws Exception {
Reader reader = new StringReader("BrasÃlia");
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(reader);
TokenStream stream = tokenFilterFactory("BrazilianStem").create(tokenizer);
assertTokenStreamContents(stream, new String[] { "brasil" });
}
use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.
the class TestLimitTokenOffsetFilter method test.
public void test() throws Exception {
for (final boolean consumeAll : new boolean[] { true, false }) {
MockTokenizer tokenizer = whitespaceMockTokenizer("A1 B2 C3 D4 E5 F6");
tokenizer.setEnableChecks(consumeAll);
//note with '3', this test would fail if erroneously the filter used endOffset instead
TokenStream stream = new LimitTokenOffsetFilter(tokenizer, 3, consumeAll);
assertTokenStreamContents(stream, new String[] { "A1", "B2" });
}
}
Aggregations