use of org.apache.lucene.analysis.core.LowerCaseFilter in project cogcomp-nlp by CogComp.
the class ASCIIEnglishAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new EnglishPossessiveFilter(result);
result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
result = new LowerCaseFilter(result);
result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet());
result = new PorterStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project cogcomp-nlp by CogComp.
the class MinimalAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
result = new EnglishPossessiveFilter(result);
result = new StopFilter(result, stopwords);
result = new WordDelimiterFilter(result, WordDelimiterFilter.ALPHA, null);
result = new PorterStemFilter(result);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project neo4j by neo4j.
the class CustomAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
called = true;
Tokenizer source = new WhitespaceTokenizer();
return new TokenStreamComponents(source, new LowerCaseFilter(source));
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project cogcomp-nlp by CogComp.
the class CharacterShingleAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new CharacterShingleTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
result = new ShingleFilter(result, 3);
return new TokenStreamComponents(source, result);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project cogcomp-nlp by CogComp.
the class WikiURLAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
final Tokenizer source = new KeywordTokenizer();
TokenStream result = new StandardFilter(source);
result = new CharacterFilter(result);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
return new TokenStreamComponents(source, result);
}
Aggregations