Search in sources :

Example 61 with StopFilter

use of org.apache.lucene.analysis.StopFilter in project ddf by codice.

the class ContextualAnalyzer method reusableTokenStream.

@Override
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
    if (overridesTokenStreamMethod) {
        // tokenStream but not reusableTokenStream
        return tokenStream(fieldName, reader);
    }
    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
    if (streams == null) {
        streams = new SavedStreams();
        setPreviousTokenStream(streams);
        streams.tokenStream = new ContextualTokenizer(reader);
        streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
        streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
        streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
    } else {
        streams.tokenStream.reset(reader);
    }
    return streams.filteredTokenStream;
}
Also used : StopFilter(org.apache.lucene.analysis.StopFilter) StandardFilter(org.apache.lucene.analysis.standard.StandardFilter) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter)

Example 62 with StopFilter

use of org.apache.lucene.analysis.StopFilter in project ddf by codice.

the class ContextualAnalyzer method tokenStream.

/**
 * Constructs a {@link org.apache.lucene.analysis.standard.StandardTokenizer} filtered by a {@link
 * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
 */
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    ContextualTokenizer tokenStream = new ContextualTokenizer(reader);
    TokenStream result = new StandardFilter(tokenStream);
    result = new LowerCaseFilter(result);
    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
    return result;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StopFilter(org.apache.lucene.analysis.StopFilter) StandardFilter(org.apache.lucene.analysis.standard.StandardFilter) LowerCaseFilter(org.apache.lucene.analysis.LowerCaseFilter)

Example 63 with StopFilter

use of org.apache.lucene.analysis.StopFilter in project ddf by codice.

the class CaseSensitiveContextualAnalyzer method tokenStream.

/**
 * Constructs a {@link org.apache.lucene.analysis.standard.StandardTokenizer} filtered by a {@link
 * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}.
 */
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
    ContextualTokenizer tokenStream = new ContextualTokenizer(reader);
    TokenStream result = new StandardFilter(tokenStream);
    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
    return result;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) StopFilter(org.apache.lucene.analysis.StopFilter) StandardFilter(org.apache.lucene.analysis.standard.StandardFilter)

Example 64 with StopFilter

use of org.apache.lucene.analysis.StopFilter in project zm-mailbox by Zimbra.

the class UniversalAnalyzer method createTokenStream.

private TokenStream createTokenStream(Tokenizer tokenizer) {
    TokenStream result = new UniversalTokenFilter(tokenizer);
    Set stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    try {
        stopWords = Provisioning.getInstance().getConfig().getMultiAttrSet(Provisioning.A_zimbraDefaultAnalyzerStopWords);
    } catch (ServiceException e) {
        ZimbraLog.index.error("Failed to retrieve stop words from LDAP", e);
    }
    // disable position increment for backward compatibility
    result = new StopFilter(LuceneIndex.VERSION, result, stopWords);
    return result;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Set(java.util.Set) ServiceException(com.zimbra.common.service.ServiceException) StopFilter(org.apache.lucene.analysis.StopFilter)

Example 65 with StopFilter

use of org.apache.lucene.analysis.StopFilter in project vertigo by KleeGroup.

the class DefaultAnalyzer method createComponents.

/**
 * Creates a TokenStream which tokenizes all the text in the provided Reader.
 *
 * @return A TokenStream build from a StandardTokenizer filtered with
 *         StandardFilter, StopFilter, FrenchStemFilter and LowerCaseFilter
 */
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
    /* initialisation du token */
    final Tokenizer source = new StandardTokenizer();
    // -----
    /* on retire les élisions*/
    final CharArraySet elisionSet = new CharArraySet(Arrays.asList(LuceneConstants.ELISION_ARTICLES), true);
    TokenStream filter = new ElisionFilter(source, elisionSet);
    /* on retire article adjectif */
    filter = new StopFilter(filter, stopWords);
    /* on retire les accents */
    filter = new ASCIIFoldingFilter(filter);
    /* on met en minuscule */
    filter = new LowerCaseFilter(filter);
    return new TokenStreamComponents(source, filter);
}
Also used : CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) ElisionFilter(org.apache.lucene.analysis.util.ElisionFilter) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) StopFilter(org.apache.lucene.analysis.StopFilter) ASCIIFoldingFilter(org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) StandardTokenizer(org.apache.lucene.analysis.standard.StandardTokenizer) LowerCaseFilter(org.apache.lucene.analysis.core.LowerCaseFilter)

Aggregations

StopFilter (org.apache.lucene.analysis.StopFilter)68 TokenStream (org.apache.lucene.analysis.TokenStream)57 Tokenizer (org.apache.lucene.analysis.Tokenizer)55 LowerCaseFilter (org.apache.lucene.analysis.LowerCaseFilter)49 StandardTokenizer (org.apache.lucene.analysis.standard.StandardTokenizer)43 StandardFilter (org.apache.lucene.analysis.standard.StandardFilter)36 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)33 SnowballFilter (org.apache.lucene.analysis.snowball.SnowballFilter)17 CharArraySet (org.apache.lucene.analysis.CharArraySet)9 Analyzer (org.apache.lucene.analysis.Analyzer)7 DecimalDigitFilter (org.apache.lucene.analysis.core.DecimalDigitFilter)6 ElisionFilter (org.apache.lucene.analysis.util.ElisionFilter)6 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)5 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)4 EnglishPossessiveFilter (org.apache.lucene.analysis.en.EnglishPossessiveFilter)4 PorterStemFilter (org.apache.lucene.analysis.en.PorterStemFilter)4 ASCIIFoldingFilter (org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter)4 Reader (java.io.Reader)3 TokenFilter (org.apache.lucene.analysis.TokenFilter)3