Search in sources :

Example 6 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class PhraseSuggestionBuilder method getShingleFilterFactory.

private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
    if (analyzer instanceof NamedAnalyzer) {
        analyzer = ((NamedAnalyzer) analyzer).analyzer();
    }
    if (analyzer instanceof CustomAnalyzer) {
        final CustomAnalyzer a = (CustomAnalyzer) analyzer;
        final TokenFilterFactory[] tokenFilters = a.tokenFilters();
        for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
            if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
                return ((ShingleTokenFilterFactory) tokenFilterFactory).getInnerFactory();
            } else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
                return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
            }
        }
    }
    return null;
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer) ShingleTokenFilterFactory(org.elasticsearch.index.analysis.ShingleTokenFilterFactory) ShingleTokenFilterFactory(org.elasticsearch.index.analysis.ShingleTokenFilterFactory) TokenFilterFactory(org.elasticsearch.index.analysis.TokenFilterFactory)

Example 7 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch-suggest-plugin by spinscale.

the class AbstractCacheLoaderSuggester method load.

@Override
public T load(ShardSuggestService.FieldType fieldType) throws Exception {
    MapperService.SmartNameFieldMappers fieldMappers = mapperService.smartName(fieldType.field(), fieldType.types());
    Analyzer queryAnalyzer = null;
    Analyzer indexAnalyzer = null;
    if (fieldMappers != null) {
        FieldMapper fieldMapper = mapperService.smartName(fieldType.field(), fieldType.types()).mapper();
        queryAnalyzer = fieldMapper.searchAnalyzer();
        if (Strings.hasLength(fieldType.indexAnalyzer())) {
            NamedAnalyzer namedAnalyzer = analysisService.analyzer(fieldType.queryAnalyzer());
            if (namedAnalyzer == null) {
                throw new ElasticsearchException("Query analyzer[" + fieldType.queryAnalyzer() + "] does not exist.");
            }
            queryAnalyzer = namedAnalyzer.analyzer();
        }
        indexAnalyzer = fieldMapper.searchAnalyzer();
        if (Strings.hasLength(fieldType.indexAnalyzer())) {
            NamedAnalyzer namedAnalyzer = analysisService.analyzer(fieldType.indexAnalyzer());
            if (namedAnalyzer == null) {
                throw new ElasticsearchException("Index analyzer[" + fieldType.indexAnalyzer() + "] does not exist.");
            }
            indexAnalyzer = namedAnalyzer.analyzer();
        }
    }
    if (queryAnalyzer == null) {
        queryAnalyzer = new StandardAnalyzer(org.elasticsearch.Version.CURRENT.luceneVersion);
    }
    if (indexAnalyzer == null) {
        indexAnalyzer = new StandardAnalyzer(org.elasticsearch.Version.CURRENT.luceneVersion);
    }
    return getSuggester(indexAnalyzer, queryAnalyzer, fieldType);
}
Also used : NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) ElasticsearchException(org.elasticsearch.ElasticsearchException) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) FieldMapper(org.elasticsearch.index.mapper.FieldMapper) MapperService(org.elasticsearch.index.mapper.MapperService)

Example 8 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class QueryStringQueryBuilder method doToQuery.

@Override
protected Query doToQuery(QueryShardContext context) throws IOException {
    //e.g. field names get expanded to concrete names, defaults get resolved sometimes to settings values etc.
    if (splitOnWhitespace == false && autoGeneratePhraseQueries) {
        throw new IllegalArgumentException("it is disallowed to disable [split_on_whitespace] " + "if [auto_generate_phrase_queries] is activated");
    }
    QueryParserSettings qpSettings;
    if (this.escape) {
        qpSettings = new QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParser.escape(this.queryString));
    } else {
        qpSettings = new QueryParserSettings(this.queryString);
    }
    Map<String, Float> resolvedFields = new TreeMap<>();
    if ((useAllFields != null && useAllFields) && (fieldsAndWeights.size() != 0 || this.defaultField != null)) {
        throw addValidationError("cannot use [all_fields] parameter in conjunction with [default_field] or [fields]", null);
    }
    // - and no fields are specified in the request
    if ((this.useAllFields != null && this.useAllFields) || (context.getMapperService().allEnabled() == false && "_all".equals(context.defaultField()) && this.defaultField == null && this.fieldsAndWeights.size() == 0)) {
        // Use the automatically determined expansion of all queryable fields
        resolvedFields = allQueryableDefaultFields(context);
        // Automatically set leniency to "true" if unset so mismatched fields don't cause exceptions
        qpSettings.lenient(lenient == null ? true : lenient);
    } else {
        qpSettings.defaultField(this.defaultField == null ? context.defaultField() : this.defaultField);
        for (Map.Entry<String, Float> fieldsEntry : fieldsAndWeights.entrySet()) {
            String fieldName = fieldsEntry.getKey();
            Float weight = fieldsEntry.getValue();
            if (Regex.isSimpleMatchPattern(fieldName)) {
                for (String resolvedFieldName : context.getMapperService().simpleMatchToIndexNames(fieldName)) {
                    resolvedFields.put(resolvedFieldName, weight);
                }
            } else {
                resolvedFields.put(fieldName, weight);
            }
        }
        qpSettings.lenient(lenient == null ? context.queryStringLenient() : lenient);
    }
    qpSettings.fieldsAndWeights(resolvedFields);
    qpSettings.defaultOperator(defaultOperator.toQueryParserOperator());
    if (analyzer == null) {
        qpSettings.defaultAnalyzer(context.getMapperService().searchAnalyzer());
    } else {
        NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(analyzer);
        if (namedAnalyzer == null) {
            throw new QueryShardException(context, "[query_string] analyzer [" + analyzer + "] not found");
        }
        qpSettings.forceAnalyzer(namedAnalyzer);
    }
    if (quoteAnalyzer != null) {
        NamedAnalyzer namedAnalyzer = context.getIndexAnalyzers().get(quoteAnalyzer);
        if (namedAnalyzer == null) {
            throw new QueryShardException(context, "[query_string] quote_analyzer [" + quoteAnalyzer + "] not found");
        }
        qpSettings.forceQuoteAnalyzer(namedAnalyzer);
    } else if (analyzer != null) {
        qpSettings.forceQuoteAnalyzer(qpSettings.analyzer());
    } else {
        qpSettings.defaultQuoteAnalyzer(context.getMapperService().searchQuoteAnalyzer());
    }
    qpSettings.quoteFieldSuffix(quoteFieldSuffix);
    qpSettings.autoGeneratePhraseQueries(autoGeneratePhraseQueries);
    qpSettings.allowLeadingWildcard(allowLeadingWildcard == null ? context.queryStringAllowLeadingWildcard() : allowLeadingWildcard);
    qpSettings.analyzeWildcard(analyzeWildcard == null ? context.queryStringAnalyzeWildcard() : analyzeWildcard);
    qpSettings.enablePositionIncrements(enablePositionIncrements);
    qpSettings.fuzziness(fuzziness);
    qpSettings.fuzzyPrefixLength(fuzzyPrefixLength);
    qpSettings.fuzzyMaxExpansions(fuzzyMaxExpansions);
    qpSettings.fuzzyRewriteMethod(QueryParsers.parseRewriteMethod(this.fuzzyRewrite));
    qpSettings.phraseSlop(phraseSlop);
    qpSettings.useDisMax(useDisMax);
    qpSettings.tieBreaker(tieBreaker);
    qpSettings.rewriteMethod(QueryParsers.parseRewriteMethod(this.rewrite));
    qpSettings.timeZone(timeZone);
    qpSettings.maxDeterminizedStates(maxDeterminizedStates);
    qpSettings.splitOnWhitespace(splitOnWhitespace);
    MapperQueryParser queryParser = context.queryParser(qpSettings);
    Query query;
    try {
        query = queryParser.parse(queryString);
    } catch (org.apache.lucene.queryparser.classic.ParseException e) {
        throw new QueryShardException(context, "Failed to parse query [" + this.queryString + "]", e);
    }
    if (query == null) {
        return null;
    }
    //save the BoostQuery wrapped structure if present
    List<Float> boosts = new ArrayList<>();
    while (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        boosts.add(boostQuery.getBoost());
        query = boostQuery.getQuery();
    }
    query = Queries.fixNegativeQueryIfNeeded(query);
    query = Queries.maybeApplyMinimumShouldMatch(query, this.minimumShouldMatch);
    //restore the previous BoostQuery wrapping
    for (int i = boosts.size() - 1; i >= 0; i--) {
        query = new BoostQuery(query, boosts.get(i));
    }
    return query;
}
Also used : MapperQueryParser(org.apache.lucene.queryparser.classic.MapperQueryParser) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) Query(org.apache.lucene.search.Query) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ArrayList(java.util.ArrayList) QueryParserSettings(org.apache.lucene.queryparser.classic.QueryParserSettings) TreeMap(java.util.TreeMap) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 9 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class TransportAnalyzeAction method detailAnalyze.

private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
    DetailAnalyzeResponse detailResponse;
    final Set<String> includeAttributes = new HashSet<>();
    if (request.attributes() != null) {
        for (String attribute : request.attributes()) {
            includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
        }
    }
    CustomAnalyzer customAnalyzer = null;
    if (analyzer instanceof CustomAnalyzer) {
        customAnalyzer = (CustomAnalyzer) analyzer;
    } else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
        customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
    }
    if (customAnalyzer != null) {
        // customAnalyzer = divide charfilter, tokenizer tokenfilters
        CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
        TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
        TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
        String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
        TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
        TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
        for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
            String charFilteredSource = request.text()[textIndex];
            Reader reader = new FastStringReader(charFilteredSource);
            if (charFilterFactories != null) {
                for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
                    reader = charFilterFactories[charFilterIndex].create(reader);
                    Reader readerForWriteOut = new FastStringReader(charFilteredSource);
                    readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
                    charFilteredSource = writeCharStream(readerForWriteOut);
                    charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
                }
            }
            // analyzing only tokenizer
            Tokenizer tokenizer = tokenizerFactory.create();
            tokenizer.setReader(reader);
            tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
            // analyzing each tokenfilter
            if (tokenFilterFactories != null) {
                for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
                    if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
                        tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
                    }
                    TokenStream stream = createStackedTokenStream(request.text()[textIndex], charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
                    tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
                }
            }
        }
        DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
        if (charFilterFactories != null) {
            for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
                charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
            }
        }
        DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
        if (tokenFilterFactories != null) {
            for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
                tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
            }
        }
        detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
    } else {
        String name;
        if (analyzer instanceof NamedAnalyzer) {
            name = ((NamedAnalyzer) analyzer).name();
        } else {
            name = analyzer.getClass().getName();
        }
        TokenListCreator tokenListCreator = new TokenListCreator();
        for (String text : request.text()) {
            tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field, includeAttributes);
        }
        detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
    }
    return detailResponse;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) FastStringReader(org.elasticsearch.common.io.FastStringReader) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) FastStringReader(org.elasticsearch.common.io.FastStringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) HashSet(java.util.HashSet) TokenizerFactory(org.elasticsearch.index.analysis.TokenizerFactory) CharFilterFactory(org.elasticsearch.index.analysis.CharFilterFactory) TokenFilterFactory(org.elasticsearch.index.analysis.TokenFilterFactory) CustomAnalyzer(org.elasticsearch.index.analysis.CustomAnalyzer)

Example 10 with NamedAnalyzer

use of org.elasticsearch.index.analysis.NamedAnalyzer in project elasticsearch by elastic.

the class KeywordFieldMapper method parseCreateField.

@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
    String value;
    if (context.externalValueSet()) {
        value = context.externalValue().toString();
    } else {
        XContentParser parser = context.parser();
        if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
            value = fieldType().nullValueAsString();
        } else {
            value = parser.textOrNull();
        }
    }
    if (value == null || value.length() > ignoreAbove) {
        return;
    }
    final NamedAnalyzer normalizer = fieldType().normalizer();
    if (normalizer != null) {
        try (TokenStream ts = normalizer.tokenStream(name(), value)) {
            final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            if (ts.incrementToken() == false) {
                throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 0 for analyzer " + normalizer + " and input \"" + value + "\"");
            }
            final String newValue = termAtt.toString();
            if (ts.incrementToken()) {
                throw new IllegalStateException("The normalization token stream is " + "expected to produce exactly 1 token, but got 2+ for analyzer " + normalizer + " and input \"" + value + "\"");
            }
            ts.end();
            value = newValue;
        }
    }
    if (context.includeInAll(includeInAll, this)) {
        context.allEntries().addText(fieldType().name(), value, fieldType().boost());
    }
    // convert to utf8 only once before feeding postings/dv/stored fields
    final BytesRef binaryValue = new BytesRef(value);
    if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
        Field field = new Field(fieldType().name(), binaryValue, fieldType());
        fields.add(field);
    }
    if (fieldType().hasDocValues()) {
        fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
    }
}
Also used : SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) IndexableField(org.apache.lucene.index.IndexableField) Field(org.apache.lucene.document.Field) TypeParsers.parseField(org.elasticsearch.index.mapper.TypeParsers.parseField) TokenStream(org.apache.lucene.analysis.TokenStream) CharTermAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute) NamedAnalyzer(org.elasticsearch.index.analysis.NamedAnalyzer) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) XContentParser(org.elasticsearch.common.xcontent.XContentParser) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

NamedAnalyzer (org.elasticsearch.index.analysis.NamedAnalyzer)15 Analyzer (org.apache.lucene.analysis.Analyzer)4 TokenStream (org.apache.lucene.analysis.TokenStream)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)3 CompressedXContent (org.elasticsearch.common.compress.CompressedXContent)3 IndexSettings (org.elasticsearch.index.IndexSettings)3 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Tokenizer (org.apache.lucene.analysis.Tokenizer)2 Field (org.apache.lucene.document.Field)2 Term (org.apache.lucene.index.Term)2 TermQuery (org.apache.lucene.search.TermQuery)2 CompletionAnalyzer (org.apache.lucene.search.suggest.document.CompletionAnalyzer)2 CustomAnalyzer (org.elasticsearch.index.analysis.CustomAnalyzer)2 IndexAnalyzers (org.elasticsearch.index.analysis.IndexAnalyzers)2 TokenFilterFactory (org.elasticsearch.index.analysis.TokenFilterFactory)2 Reader (java.io.Reader)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1