use of org.apache.lucene.analysis.core.LowerCaseFilter in project jena by apache.
the class LowerCaseKeywordAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
KeywordTokenizer source = new KeywordTokenizer();
LowerCaseFilter filter = new LowerCaseFilter(source);
return new TokenStreamComponents(source, filter);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project nutch by apache.
the class LuceneTokenizer method createNGramTokenStream.
private TokenStream createNGramTokenStream(String content, int mingram, int maxgram) {
Tokenizer tokenizer = new StandardTokenizer();
tokenizer.setReader(new StringReader(content));
tokenStream = new LowerCaseFilter(tokenizer);
tokenStream = applyStemmer(stemFilterType);
ShingleFilter shingleFilter = new ShingleFilter(tokenStream, mingram, maxgram);
shingleFilter.setOutputUnigrams(false);
tokenStream = (TokenStream) shingleFilter;
return tokenStream;
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project nutch by apache.
the class LuceneTokenizer method createTokenStream.
private TokenStream createTokenStream(String content) {
tokenStream = generateTokenStreamFromText(content, tokenizer);
tokenStream = new LowerCaseFilter(tokenStream);
if (stopSet != null) {
tokenStream = applyStopFilter(stopSet);
}
tokenStream = applyStemmer(stemFilterType);
return tokenStream;
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project nutch by apache.
the class LuceneAnalyzerUtil method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new ClassicTokenizer();
TokenStream filter = new LowerCaseFilter(source);
if (stopSet != null) {
filter = new StopFilter(filter, stopSet);
}
switch(stemFilterType) {
case PORTERSTEM_FILTER:
filter = new PorterStemFilter(filter);
break;
case ENGLISHMINIMALSTEM_FILTER:
filter = new EnglishMinimalStemFilter(filter);
break;
default:
break;
}
return new TokenStreamComponents(source, filter);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project cogcomp-nlp by CogComp.
the class CharacterShingleAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new CharacterShingleTokenizer();
TokenStream result = new StandardFilter(source);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);
result = new ShingleFilter(result, 3);
return new TokenStreamComponents(source, result);
}
Aggregations