use of org.apache.lucene.analysis.core.LowerCaseFilter in project che by eclipse.
the class LuceneSearcher method makeAnalyzer.
protected Analyzer makeAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new WhitespaceTokenizer();
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
};
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project lucene-solr by apache.
the class SynonymGraphFilterFactory method inform.
@Override
public void inform(ResourceLoader loader) throws IOException {
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer;
if (analyzerName != null) {
analyzer = loadAnalyzer(loader, analyzerName);
} else {
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create();
TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
}
try (Analyzer a = analyzer) {
String formatClass = format;
if (format == null || format.equals("solr")) {
formatClass = SolrSynonymParser.class.getName();
} else if (format.equals("wordnet")) {
formatClass = WordnetSynonymParser.class.getName();
}
// TODO: expose dedup as a parameter?
map = loadSynonyms(loader, formatClass, true, a);
} catch (ParseException e) {
throw new IOException("Error parsing synonyms file:", e);
}
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project jackrabbit-oak by apache.
the class OakAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
TokenStream tok = new LowerCaseFilter(matchVersion, src);
tok = new WordDelimiterFilter(tok, WordDelimiterFilter.GENERATE_WORD_PARTS | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE | this.INDEX_ORIGINAL_TERM | WordDelimiterFilter.GENERATE_NUMBER_PARTS, null);
return new TokenStreamComponents(src, tok);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project Krill by KorAP.
the class TextAnalyzer method createComponents.
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
final Tokenizer source = new StandardTokenizer();
TokenStream sink = new LowerCaseFilter(source);
return new TokenStreamComponents(source, sink);
}
use of org.apache.lucene.analysis.core.LowerCaseFilter in project vertigo by KleeGroup.
the class DefaultAnalyzer method createComponents.
/**
* Creates a TokenStream which tokenizes all the text in the provided Reader.
*
* @return A TokenStream build from a StandardTokenizer filtered with
* StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
*/
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
/* initialisation du token */
final Tokenizer source = new StandardTokenizer();
// -----
/* on retire les élisions*/
final CharArraySet elisionSet = new CharArraySet(Arrays.asList(LuceneConstants.ELISION_ARTICLES), true);
TokenStream filter = new ElisionFilter(source, elisionSet);
/* on retire article adjectif */
filter = new StopFilter(filter, stopWords);
/* on retire les accents */
filter = new ASCIIFoldingFilter(filter);
/* on met en minuscule */
filter = new LowerCaseFilter(filter);
return new TokenStreamComponents(source, filter);
}
Aggregations