Search in sources :

Example 1 with StandardTokenizerFactory

use of org.opensearch.index.analysis.StandardTokenizerFactory in project OpenSearch by opensearch-project.

the class AnnotatedTextFieldMapperTests method createIndexAnalyzers.

@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
    NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
    NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
    NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
    NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
    NamedAnalyzer stop = new NamedAnalyzer("my_stop_analyzer", AnalyzerScope.INDEX, new CustomAnalyzer(new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() {

        @Override
        public String name() {
            return "stop";
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
    } }));
    Map<String, NamedAnalyzer> analyzers = new HashMap<>();
    analyzers.put("default", dflt);
    analyzers.put("standard", standard);
    analyzers.put("keyword", keyword);
    analyzers.put("whitespace", whitespace);
    analyzers.put("my_stop_analyzer", stop);
    return new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) HashMap(java.util.HashMap) StopFilter(org.apache.lucene.analysis.StopFilter) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) Matchers.containsString(org.hamcrest.Matchers.containsString) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers)

Example 2 with StandardTokenizerFactory

use of org.opensearch.index.analysis.StandardTokenizerFactory in project OpenSearch by opensearch-project.

the class TextFieldMapperTests method createIndexAnalyzers.

@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
    NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
    NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
    NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
    NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
    NamedAnalyzer stop = new NamedAnalyzer("my_stop_analyzer", AnalyzerScope.INDEX, new CustomAnalyzer(new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() {

        @Override
        public String name() {
            return "stop";
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
    } }));
    return new IndexAnalyzers(org.opensearch.common.collect.Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop), org.opensearch.common.collect.Map.of(), org.opensearch.common.collect.Map.of());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StopFilter(org.apache.lucene.analysis.StopFilter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory)

Aggregations

StopFilter (org.apache.lucene.analysis.StopFilter)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)2 WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 CharFilterFactory (org.opensearch.index.analysis.CharFilterFactory)2 CustomAnalyzer (org.opensearch.index.analysis.CustomAnalyzer)2 IndexAnalyzers (org.opensearch.index.analysis.IndexAnalyzers)2 NamedAnalyzer (org.opensearch.index.analysis.NamedAnalyzer)2 StandardTokenizerFactory (org.opensearch.index.analysis.StandardTokenizerFactory)2 TokenFilterFactory (org.opensearch.index.analysis.TokenFilterFactory)2 HashMap (java.util.HashMap)1 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1