use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestRussianLightStemFilter method testKeyword.
public void testKeyword() throws IOException {
final CharArraySet exclusionSet = new CharArraySet(asSet("энергии"), false);
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
return new TokenStreamComponents(source, new RussianLightStemFilter(sink));
}
};
checkOneTerm(a, "энергии", "энергии");
a.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class ShingleFilterTest method testRandomHugeStrings.
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
Random random = random();
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
}
};
checkRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
a.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestDaitchMokotoffSoundexFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new DaitchMokotoffSoundexFilter(tokenizer, false));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
Analyzer b = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new DaitchMokotoffSoundexFilter(tokenizer, false));
}
};
checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
b.close();
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestDaitchMokotoffSoundexFilterFactory method testSettingInject.
public void testSettingInject() throws Exception {
Map<String, String> parameters = new HashMap<>();
parameters.put("inject", "false");
DaitchMokotoffSoundexFilterFactory factory = new DaitchMokotoffSoundexFilterFactory(parameters);
Tokenizer inputStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
inputStream.setReader(new StringReader("international"));
TokenStream filteredStream = factory.create(inputStream);
assertEquals(DaitchMokotoffSoundexFilter.class, filteredStream.getClass());
assertTokenStreamContents(filteredStream, new String[] { "063963" });
}
use of org.apache.lucene.analysis.Tokenizer in project lucene-solr by apache.
the class TestPhoneticFilter method assertAlgorithm.
static void assertAlgorithm(Encoder encoder, boolean inject, String input, String[] expected) throws Exception {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader(input));
PhoneticFilter filter = new PhoneticFilter(tokenizer, encoder, inject);
assertTokenStreamContents(filter, expected);
}
Aggregations