use of org.apache.lucene.analysis.snowball.SnowballFilter in project elasticsearch by elastic.
the class SnowballAnalyzer method createComponents.
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */
@Override
public TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = new StandardTokenizer();
TokenStream result = tokenizer;
// remove the possessive 's for english stemmers
if (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))
result = new EnglishPossessiveFilter(result);
// Use a special lowercase filter for turkish, the stemmer expects it.
if (name.equals("Turkish"))
result = new TurkishLowerCaseFilter(result);
else
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(result, stopSet);
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
use of org.apache.lucene.analysis.snowball.SnowballFilter in project lucene-solr by apache.
the class SwedishAnalyzer method createComponents.
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new LowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new SwedishStemmer());
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.snowball.SnowballFilter in project lucene-solr by apache.
the class TestKeywordRepeatFilter method testBasic.
public void testBasic() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(whitespaceMockTokenizer("the birds are flying")), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
}
use of org.apache.lucene.analysis.snowball.SnowballFilter in project lucene-solr by apache.
the class TestKeywordRepeatFilter method testComposition.
public void testComposition() throws IOException {
TokenStream ts = new RemoveDuplicatesTokenFilter(new SnowballFilter(new KeywordRepeatFilter(new KeywordRepeatFilter(whitespaceMockTokenizer("the birds are flying"))), "English"));
assertTokenStreamContents(ts, new String[] { "the", "birds", "bird", "are", "flying", "fli" }, new int[] { 1, 1, 0, 1, 1, 0 });
}
use of org.apache.lucene.analysis.snowball.SnowballFilter in project lucene-solr by apache.
the class IrishAnalyzer method createComponents.
/**
* Creates a
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link IrishLowerCaseFilter}, {@link StopFilter}
* , {@link SetKeywordMarkerFilter} if a stem exclusion set is
* provided and {@link SnowballFilter}.
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new StopFilter(result, HYPHENATIONS);
result = new ElisionFilter(result, DEFAULT_ARTICLES);
result = new IrishLowerCaseFilter(result);
result = new StopFilter(result, stopwords);
if (!stemExclusionSet.isEmpty())
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
result = new SnowballFilter(result, new IrishStemmer());
return new TokenStreamComponents(source, result);
}
Aggregations