use of org.apache.lucene.analysis.en.EnglishPossessiveFilter in project cogcomp-nlp by CogComp.
the class ASCIIEnglishAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new EnglishPossessiveFilter(result);
result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
result = new LowerCaseFilter(result);
result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet());
result = new PorterStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.en.EnglishPossessiveFilter in project cogcomp-nlp by CogComp.
the class MinimalAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
result = new EnglishPossessiveFilter(result);
result = new StopFilter(result, stopwords);
result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
result = new PorterStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.en.EnglishPossessiveFilter in project elasticsearch by elastic.
the class SnowballAnalyzer method createComponents.
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */
@Override
public TokenStreamComponents createComponents(String fieldName) {
final Tokenizer tokenizer = new StandardTokenizer();
TokenStream result = tokenizer;
// remove the possessive 's for english stemmers
if (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))
result = new EnglishPossessiveFilter(result);
// Use a special lowercase filter for turkish, the stemmer expects it.
if (name.equals("Turkish"))
result = new TurkishLowerCaseFilter(result);
else
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(result, stopSet);
result = new SnowballFilter(result, name);
return new TokenStreamComponents(tokenizer, result);
}
Aggregations