Search in sources :

Example 1 with CustomAnalyzer

use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.

the class FragmentBuilderHelper method containsBrokenAnalysis.

private static boolean containsBrokenAnalysis(Analyzer analyzer) {
    // TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis
    if (analyzer instanceof NamedAnalyzer) {
        analyzer = ((NamedAnalyzer) analyzer).analyzer();
    }
    if (analyzer instanceof CustomAnalyzer) {
        final CustomAnalyzer a = (CustomAnalyzer) analyzer;
        TokenFilterFactory[] tokenFilters = a.tokenFilters();
        for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
            if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory || tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) {
                return true;
            }
        }
    }
    return false;
}
Also used : WordDelimiterTokenFilterFactory(org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory) EdgeNGramTokenFilterFactory(org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) NGramTokenFilterFactory(org.elasticsearch.index.analysis.NGramTokenFilterFactory) WordDelimiterTokenFilterFactory(org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory) EdgeNGramTokenFilterFactory(org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory) TokenFilterFactory(org.elasticsearch.index.analysis.TokenFilterFactory)

Example 2 with CustomAnalyzer

use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.

the class PhraseSuggestionBuilder method getShingleFilterFactory.

private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
    if (analyzer instanceof NamedAnalyzer) {
        analyzer = ((NamedAnalyzer) analyzer).analyzer();
    }
    if (analyzer instanceof CustomAnalyzer) {
        final CustomAnalyzer a = (CustomAnalyzer) analyzer;
        final TokenFilterFactory[] tokenFilters = a.tokenFilters();
        for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
            if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
                return ((ShingleTokenFilterFactory) tokenFilterFactory).getInnerFactory();
            } else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
                return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
            }
        }
    }
    return null;
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) ShingleTokenFilterFactory(org.elasticsearch.index.analysis.ShingleTokenFilterFactory) ShingleTokenFilterFactory(org.elasticsearch.index.analysis.ShingleTokenFilterFactory) TokenFilterFactory(org.elasticsearch.index.analysis.TokenFilterFactory)

Example 3 with CustomAnalyzer

use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.

the class TransportAnalyzeAction method detailAnalyze.

private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    DetailAnalyzeResponse detailResponse;
    final Set<String> includeAttributes = new HashSet<>();
    if (request.attributes() != null) {
        for (String attribute : request.attributes()) {
            includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
        }
    }
    CustomAnalyzer customAnalyzer = null;
    if (analyzer instanceof CustomAnalyzer) {
        customAnalyzer = (CustomAnalyzer) analyzer;
    } else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
        customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
    }
    if (customAnalyzer != null) {
        // customAnalyzer = divide charfilter, tokenizer tokenfilters
        CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
        TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
        TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
        String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
        TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
        TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
        for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
            String charFilteredSource = request.text()[textIndex];
            Reader reader = new FastStringReader(charFilteredSource);
            if (charFilterFactories != null) {
                for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
                    reader = charFilterFactories[charFilterIndex].create(reader);
                    Reader readerForWriteOut = new FastStringReader(charFilteredSource);
                    readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
                    charFilteredSource = writeCharStream(readerForWriteOut);
                    charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
                }
            }
            // analyzing only tokenizer
            Tokenizer tokenizer = tokenizerFactory.create();
            tokenizer.setReader(reader);
            tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
            // analyzing each tokenfilter
            if (tokenFilterFactories != null) {
                for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
                    if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
                        tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
                    }
                    TokenStream stream = createStackedTokenStream(request.text()[textIndex], charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
                    tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
                }
            }
        }
        DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
        if (charFilterFactories != null) {
            for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
                charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
            }
        }
        DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
        if (tokenFilterFactories != null) {
            for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
                tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
            }
        }
        detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
    } else {
        String name;
        if (analyzer instanceof NamedAnalyzer) {
            name = ((NamedAnalyzer) analyzer).name();
        } else {
            name = analyzer.getClass().getName();
        }
        TokenListCreator tokenListCreator = new TokenListCreator();
        for (String text : request.text()) {
            tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field, includeAttributes);
        }
        detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
    }
    return detailResponse;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) FastStringReader(org.elasticsearch.common.io.FastStringReader) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) FastStringReader(org.elasticsearch.common.io.FastStringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) HashSet(java.util.HashSet) TokenizerFactory(org.elasticsearch.index.analysis.TokenizerFactory) CharFilterFactory(org.elasticsearch.index.analysis.CharFilterFactory) TokenFilterFactory(org.elasticsearch.index.analysis.TokenFilterFactory) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer)

Example 4 with CustomAnalyzer

use of org.elasticsearch.index.analysis.CustomAnalyzer in project elasticsearch by elastic.

the class AnalysisModuleTests method testSimpleConfiguration.

private void testSimpleConfiguration(Settings settings) throws IOException {
    IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings);
    Analyzer analyzer = indexAnalyzers.get("custom1").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom1 = (CustomAnalyzer) analyzer;
    assertThat(custom1.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    assertThat(custom1.tokenFilters().length, equalTo(2));
    StopTokenFilterFactory stop1 = (StopTokenFilterFactory) custom1.tokenFilters()[0];
    assertThat(stop1.stopWords().size(), equalTo(1));
    analyzer = indexAnalyzers.get("custom2").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    // verify position increment gap
    analyzer = indexAnalyzers.get("custom6").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
    assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
    // verify characters  mapping
    analyzer = indexAnalyzers.get("custom5").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom5 = (CustomAnalyzer) analyzer;
    assertThat(custom5.charFilters()[0], instanceOf(MappingCharFilterFactory.class));
    // check custom pattern replace filter
    analyzer = indexAnalyzers.get("custom3").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom3 = (CustomAnalyzer) analyzer;
    PatternReplaceCharFilterFactory patternReplaceCharFilterFactory = (PatternReplaceCharFilterFactory) custom3.charFilters()[0];
    assertThat(patternReplaceCharFilterFactory.getPattern().pattern(), equalTo("sample(.*)"));
    assertThat(patternReplaceCharFilterFactory.getReplacement(), equalTo("replacedSample $1"));
    // check custom class name (my)
    analyzer = indexAnalyzers.get("custom4").analyzer();
    assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    CustomAnalyzer custom4 = (CustomAnalyzer) analyzer;
    assertThat(custom4.tokenFilters()[0], instanceOf(MyFilterTokenFilterFactory.class));
    //        // verify Czech stemmer
    //        analyzer = analysisService.analyzer("czechAnalyzerWithStemmer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer czechstemmeranalyzer = (CustomAnalyzer) analyzer;
    //        assertThat(czechstemmeranalyzer.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(czechstemmeranalyzer.tokenFilters().length, equalTo(4));
    //        assertThat(czechstemmeranalyzer.tokenFilters()[3], instanceOf(CzechStemTokenFilterFactory.class));
    //
    //        // check dictionary decompounder
    //        analyzer = analysisService.analyzer("decompoundingAnalyzer").analyzer();
    //        assertThat(analyzer, instanceOf(CustomAnalyzer.class));
    //        CustomAnalyzer dictionaryDecompounderAnalyze = (CustomAnalyzer) analyzer;
    //        assertThat(dictionaryDecompounderAnalyze.tokenizerFactory(), instanceOf(StandardTokenizerFactory.class));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters().length, equalTo(1));
    //        assertThat(dictionaryDecompounderAnalyze.tokenFilters()[0], instanceOf(DictionaryCompoundWordTokenFilterFactory.class));
    Set<?> wordList = Analysis.getWordSet(null, Version.CURRENT, settings, "index.analysis.filter.dict_dec.word_list");
    MatcherAssert.assertThat(wordList.size(), equalTo(6));
//        MatcherAssert.assertThat(wordList, hasItems("donau", "dampf", "schiff", "spargel", "creme", "suppe"));
}
Also used : StopTokenFilterFactory(org.elasticsearch.index.analysis.StopTokenFilterFactory) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardTokenizerFactory(org.elasticsearch.index.analysis.StandardTokenizerFactory) IndexAnalyzers(org.elasticsearch.index.analysis.IndexAnalyzers) MappingCharFilterFactory(org.elasticsearch.index.analysis.MappingCharFilterFactory) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) GermanAnalyzer(org.apache.lucene.analysis.de.GermanAnalyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) EnglishAnalyzer(org.apache.lucene.analysis.en.EnglishAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) PatternReplaceCharFilterFactory(org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory) MyFilterTokenFilterFactory(org.elasticsearch.index.analysis.filter1.MyFilterTokenFilterFactory)

Aggregations

CustomAnalyzer (org.elasticsearch.index.analysis.CustomAnalyzer)4 NamedAnalyzer (org.elasticsearch.index.analysis.NamedAnalyzer)4 TokenFilterFactory (org.elasticsearch.index.analysis.TokenFilterFactory)3 Reader (java.io.Reader)1 HashSet (java.util.HashSet)1 Analyzer (org.apache.lucene.analysis.Analyzer)1 TokenStream (org.apache.lucene.analysis.TokenStream)1 Tokenizer (org.apache.lucene.analysis.Tokenizer)1 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)1 GermanAnalyzer (org.apache.lucene.analysis.de.GermanAnalyzer)1 EnglishAnalyzer (org.apache.lucene.analysis.en.EnglishAnalyzer)1 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)1 FastStringReader (org.elasticsearch.common.io.FastStringReader)1 CharFilterFactory (org.elasticsearch.index.analysis.CharFilterFactory)1 EdgeNGramTokenFilterFactory (org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory)1 IndexAnalyzers (org.elasticsearch.index.analysis.IndexAnalyzers)1 MappingCharFilterFactory (org.elasticsearch.index.analysis.MappingCharFilterFactory)1 NGramTokenFilterFactory (org.elasticsearch.index.analysis.NGramTokenFilterFactory)1 PatternReplaceCharFilterFactory (org.elasticsearch.index.analysis.PatternReplaceCharFilterFactory)1 ShingleTokenFilterFactory (org.elasticsearch.index.analysis.ShingleTokenFilterFactory)1