Search in sources :

Example 1 with CustomAnalyzer

use of org.opensearch.index.analysis.CustomAnalyzer in project OpenSearch by opensearch-project.

the class AnalysisModuleTests method testSimpleConfiguration.

private void testSimpleConfiguration(Settings settings) throws IOException {
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
    Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));
    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    // verify position increment gap
    analyzer = indexAnalyzers.get("custom6").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
    assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
    // check custom class name (my)
    analyzer = indexAnalyzers.get("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
}
Also used : StopTokenFilterFactory(org.opensearch.index.analysis.StopTokenFilterFactory) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) MyFilterTokenFilterFactory(org.opensearch.index.analysis.MyFilterTokenFilterFactory)

Example 2 with CustomAnalyzer

use of org.opensearch.index.analysis.CustomAnalyzer in project OpenSearch by opensearch-project.

the class AnnotatedTextFieldMapperTests method createIndexAnalyzers.

@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
    NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
    NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
    NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
    NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
    NamedAnalyzer stop = new NamedAnalyzer("my_stop_analyzer", AnalyzerScope.INDEX, new CustomAnalyzer(new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() {

        @Override
        public String name() {
            return "stop";
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
    } }));
    Map<String, NamedAnalyzer> analyzers = new HashMap<>();
    analyzers.put("default", dflt);
    analyzers.put("standard", standard);
    analyzers.put("keyword", keyword);
    analyzers.put("whitespace", whitespace);
    analyzers.put("my_stop_analyzer", stop);
    return new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) HashMap(java.util.HashMap) StopFilter(org.apache.lucene.analysis.StopFilter) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) Matchers.containsString(org.hamcrest.Matchers.containsString) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers)

Example 3 with CustomAnalyzer

use of org.opensearch.index.analysis.CustomAnalyzer in project OpenSearch by opensearch-project.

the class TextFieldMapperTests method createIndexAnalyzers.

@Override
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
    NamedAnalyzer dflt = new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer(), TextFieldMapper.Defaults.POSITION_INCREMENT_GAP);
    NamedAnalyzer standard = new NamedAnalyzer("standard", AnalyzerScope.INDEX, new StandardAnalyzer());
    NamedAnalyzer keyword = new NamedAnalyzer("keyword", AnalyzerScope.INDEX, new KeywordAnalyzer());
    NamedAnalyzer whitespace = new NamedAnalyzer("whitespace", AnalyzerScope.INDEX, new WhitespaceAnalyzer());
    NamedAnalyzer stop = new NamedAnalyzer("my_stop_analyzer", AnalyzerScope.INDEX, new CustomAnalyzer(new StandardTokenizerFactory(indexSettings, null, "standard", indexSettings.getSettings()), new CharFilterFactory[0], new TokenFilterFactory[] { new TokenFilterFactory() {

        @Override
        public String name() {
            return "stop";
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return new StopFilter(tokenStream, EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
        }
    } }));
    return new IndexAnalyzers(org.opensearch.common.collect.Map.of("default", dflt, "standard", standard, "keyword", keyword, "whitespace", whitespace, "my_stop_analyzer", stop), org.opensearch.common.collect.Map.of(), org.opensearch.common.collect.Map.of());
}
Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) CustomAnalyzer(org.opensearch.index.analysis.CustomAnalyzer) StopFilter(org.apache.lucene.analysis.StopFilter) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) CharFilterFactory(org.opensearch.index.analysis.CharFilterFactory) StandardTokenizerFactory(org.opensearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory)

Aggregations

CustomAnalyzer (org.opensearch.index.analysis.CustomAnalyzer)3 IndexAnalyzers (org.opensearch.index.analysis.IndexAnalyzers)3 StandardTokenizerFactory (org.opensearch.index.analysis.StandardTokenizerFactory)3 StopFilter (org.apache.lucene.analysis.StopFilter)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)2 WhitespaceAnalyzer (org.apache.lucene.analysis.core.WhitespaceAnalyzer)2 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)2 CharFilterFactory (org.opensearch.index.analysis.CharFilterFactory)2 NamedAnalyzer (org.opensearch.index.analysis.NamedAnalyzer)2 TokenFilterFactory (org.opensearch.index.analysis.TokenFilterFactory)2 HashMap (java.util.HashMap)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)1 Matchers.containsString (org.hamcrest.Matchers.containsString)1 MyFilterTokenFilterFactory (org.opensearch.index.analysis.MyFilterTokenFilterFactory)1 StopTokenFilterFactory (org.opensearch.index.analysis.StopTokenFilterFactory)1