use of org.apache.lucene.analysis.core.StopFilter in project cogcomp-nlp by CogComp.
the class MinimalAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
result = new EnglishPossessiveFilter(result);
result = new StopFilter(result, stopwords);
result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
result = new PorterStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.core.StopFilter in project legato by DOREMUS-ANR.
the class Stemmer method stemTerms.
// static String stemTerm (String term) {
// PorterStemmer stemmer = new PorterStemmer();
// System.out.println(term + " --> "+stemmer.stem(term));
// return stemmer.stem(term);
// }
public static String stemTerms(String term) throws Exception {
Analyzer analyzer = new StandardAnalyzer();
TokenStream result = analyzer.tokenStream(null, term);
result = new PorterStemFilter(result);
result = new StopFilter(result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
CharTermAttribute resultAttr = result.addAttribute(CharTermAttribute.class);
result.reset();
String tokens = null;
while (result.incrementToken()) {
tokens = tokens + resultAttr.toString() + " ";
}
return tokens.trim();
}
use of org.apache.lucene.analysis.core.StopFilter in project nutch by apache.
the class LuceneAnalyzerUtil method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ClassicTokenizer();
TokenStream filter = new LowerCaseFilter(source);
if (stopSet != null) {
filter = new StopFilter(filter, stopSet);
}
switch(stemFilterType) {
case PORTERSTEM_FILTER:
filter = new PorterStemFilter(filter);
break;
case ENGLISHMINIMALSTEM_FILTER:
filter = new EnglishMinimalStemFilter(filter);
break;
default:
break;
}
return new TokenStreamComponents(source, filter);
}
use of org.apache.lucene.analysis.core.StopFilter in project Vidyavana by borsosl.
the class QueryAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new QueryTokenizer();
TokenStream filter = new SeparateQueryOperatorFilter(tokenizer);
filter = new StopFilter(filter, new CharArraySet(Arrays.asList("a", "az", "és"), false));
return new TokenStreamComponents(tokenizer, filter);
}
Aggregations