Search in sources :

Example 11 with SpanOrQuery

use of org.apache.lucene.queries.spans.SpanOrQuery in project OpenSearch by opensearch-project.

the class TextFieldMapper method createPhrasePrefixQuery.

public static Query createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, String prefixField, IntPredicate usePrefixField) throws IOException {
    MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field);
    builder.setSlop(slop);
    builder.setMaxExpansions(maxExpansions);
    List<Term> currentTerms = new ArrayList<>();
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    stream.reset();
    int position = -1;
    while (stream.incrementToken()) {
        if (posIncrAtt.getPositionIncrement() != 0) {
            if (currentTerms.isEmpty() == false) {
                builder.add(currentTerms.toArray(new Term[0]), position);
            }
            position += posIncrAtt.getPositionIncrement();
            currentTerms.clear();
        }
        currentTerms.add(new Term(field, termAtt.getBytesRef()));
    }
    builder.add(currentTerms.toArray(new Term[0]), position);
    if (prefixField == null) {
        return builder;
    }
    int lastPos = builder.getTerms().length - 1;
    final Term[][] terms = builder.getTerms();
    final int[] positions = builder.getPositions();
    for (Term term : terms[lastPos]) {
        String value = term.text();
        if (usePrefixField.test(value.length()) == false) {
            return builder;
        }
    }
    if (terms.length == 1) {
        SynonymQuery.Builder sb = new SynonymQuery.Builder(prefixField);
        Arrays.stream(terms[0]).map(term -> new Term(prefixField, term.bytes())).forEach(sb::addTerm);
        return sb.build();
    }
    SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);
    spanQuery.setSlop(slop);
    int previousPos = -1;
    for (int i = 0; i < terms.length; i++) {
        Term[] posTerms = terms[i];
        int posInc = positions[i] - previousPos;
        previousPos = positions[i];
        if (posInc > 1) {
            spanQuery.addGap(posInc - 1);
        }
        if (i == lastPos) {
            if (posTerms.length == 1) {
                FieldMaskingSpanQuery fieldMask = new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, posTerms[0].bytes())), field);
                spanQuery.addClause(fieldMask);
            } else {
                SpanQuery[] queries = Arrays.stream(posTerms).map(term -> new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, term.bytes())), field)).toArray(SpanQuery[]::new);
                spanQuery.addClause(new SpanOrQuery(queries));
            }
        } else {
            if (posTerms.length == 1) {
                spanQuery.addClause(new SpanTermQuery(posTerms[0]));
            } else {
                SpanTermQuery[] queries = Arrays.stream(posTerms).map(SpanTermQuery::new).toArray(SpanTermQuery[]::new);
                spanQuery.addClause(new SpanOrQuery(queries));
            }
        }
    }
    return spanQuery.build();
}
Also used : Query(org.apache.lucene.search.Query) SpanOrQuery(org.apache.lucene.queries.spans.SpanOrQuery) Arrays(java.util.Arrays) SimilarityProvider(org.opensearch.index.similarity.SimilarityProvider) SpanTermQuery(org.apache.lucene.queries.spans.SpanTermQuery) EdgeNGramTokenFilter(org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter) FieldType(org.apache.lucene.document.FieldType) ToXContent(org.opensearch.common.xcontent.ToXContent) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) Version(org.opensearch.Version) AnalyzerScope(org.opensearch.index.analysis.AnalyzerScope) IntervalMode(org.opensearch.index.query.IntervalMode) IntPredicate(java.util.function.IntPredicate) Operations(org.apache.lucene.util.automaton.Operations) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PagedBytesIndexFieldData(org.opensearch.index.fielddata.plain.PagedBytesIndexFieldData) Map(java.util.Map) Lucene(org.opensearch.common.lucene.Lucene) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) Automata(org.apache.lucene.util.automaton.Automata) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) Iterators(org.opensearch.common.collect.Iterators) BytesRef(org.apache.lucene.util.BytesRef) Automaton(org.apache.lucene.util.automaton.Automaton) PrefixQuery(org.apache.lucene.search.PrefixQuery) SearchLookup(org.opensearch.search.lookup.SearchLookup) Objects(java.util.Objects) SpanMultiTermQueryWrapper(org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper) SpanQuery(org.apache.lucene.queries.spans.SpanQuery) List(java.util.List) QueryShardContext(org.opensearch.index.query.QueryShardContext) TokenFilter(org.apache.lucene.analysis.TokenFilter) FixedShingleFilter(org.apache.lucene.analysis.shingle.FixedShingleFilter) SpanNearQuery(org.apache.lucene.queries.spans.SpanNearQuery) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) AutomatonQueries(org.opensearch.common.lucene.search.AutomatonQueries) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) IndexFieldData(org.opensearch.index.fielddata.IndexFieldData) LegacyESVersion(org.opensearch.LegacyESVersion) ParserContext(org.opensearch.index.mapper.Mapper.TypeParser.ParserContext) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) IntervalBuilder(org.opensearch.index.query.IntervalBuilder) BytesTermAttribute(org.apache.lucene.analysis.tokenattributes.BytesTermAttribute) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) CoreValuesSourceType(org.opensearch.search.aggregations.support.CoreValuesSourceType) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) Iterator(java.util.Iterator) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) BooleanClause(org.apache.lucene.search.BooleanClause) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) TermQuery(org.apache.lucene.search.TermQuery) AnalyzerWrapper(org.apache.lucene.analysis.AnalyzerWrapper) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Field(org.apache.lucene.document.Field) FieldMaskingSpanQuery(org.apache.lucene.queries.spans.FieldMaskingSpanQuery) XContentMapValues(org.opensearch.common.xcontent.support.XContentMapValues) Intervals(org.apache.lucene.queries.intervals.Intervals) IndexOptions(org.apache.lucene.index.IndexOptions) Collections(java.util.Collections) SynonymQuery(org.apache.lucene.search.SynonymQuery) IntervalBuilder(org.opensearch.index.query.IntervalBuilder) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.queries.spans.SpanOrQuery) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) SpanQuery(org.apache.lucene.queries.spans.SpanQuery) FieldMaskingSpanQuery(org.apache.lucene.queries.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.queries.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) FieldMaskingSpanQuery(org.apache.lucene.queries.spans.FieldMaskingSpanQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) SpanNearQuery(org.apache.lucene.queries.spans.SpanNearQuery)

Example 12 with SpanOrQuery

use of org.apache.lucene.queries.spans.SpanOrQuery in project OpenSearch by opensearch-project.

the class SpanBooleanQueryRewriteWithMaxClause method rewrite.

@Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
    final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() {

        @Override
        public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
            Collection<SpanQuery> queries = collectTerms(reader, query);
            if (queries.size() == 0) {
                return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString());
            } else if (queries.size() == 1) {
                return queries.iterator().next();
            } else {
                return new SpanOrQuery(queries.toArray(new SpanQuery[0]));
            }
        }

        private Collection<SpanQuery> collectTerms(IndexReader reader, MultiTermQuery query) throws IOException {
            Set<SpanQuery> queries = new HashSet<>();
            IndexReaderContext topReaderContext = reader.getContext();
            for (LeafReaderContext context : topReaderContext.leaves()) {
                final Terms terms = context.reader().terms(query.getField());
                if (terms == null) {
                    // field does not exist
                    continue;
                }
                final TermsEnum termsEnum = getTermsEnum(query, terms, new AttributeSource());
                assert termsEnum != null;
                if (termsEnum == TermsEnum.EMPTY) {
                    continue;
                }
                BytesRef bytes;
                while ((bytes = termsEnum.next()) != null) {
                    if (queries.size() >= maxExpansions) {
                        if (hardLimit) {
                            throw new RuntimeException("[" + query.toString() + " ] " + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]");
                        } else {
                            return queries;
                        }
                    }
                    queries.add(new SpanTermQuery(new Term(query.getField(), bytes)));
                }
            }
            return queries;
        }
    };
    return (SpanQuery) delegate.rewrite(reader, query);
}
Also used : AttributeSource(org.apache.lucene.util.AttributeSource) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.queries.spans.SpanOrQuery) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) SpanQuery(org.apache.lucene.queries.spans.SpanQuery) TermsEnum(org.apache.lucene.index.TermsEnum) SpanTermQuery(org.apache.lucene.queries.spans.SpanTermQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanMatchNoDocsQuery(org.apache.lucene.queries.SpanMatchNoDocsQuery) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet)

Aggregations

SpanOrQuery (org.apache.lucene.queries.spans.SpanOrQuery)12 SpanTermQuery (org.apache.lucene.queries.spans.SpanTermQuery)10 Term (org.apache.lucene.index.Term)9 SpanNearQuery (org.apache.lucene.queries.spans.SpanNearQuery)8 SpanQuery (org.apache.lucene.queries.spans.SpanQuery)8 Query (org.apache.lucene.search.Query)7 BooleanQuery (org.apache.lucene.search.BooleanQuery)5 PhraseQuery (org.apache.lucene.search.PhraseQuery)5 SynonymQuery (org.apache.lucene.search.SynonymQuery)5 TermQuery (org.apache.lucene.search.TermQuery)5 BooleanClause (org.apache.lucene.search.BooleanClause)4 PrefixQuery (org.apache.lucene.search.PrefixQuery)4 MockSynonymAnalyzer (org.apache.lucene.tests.analysis.MockSynonymAnalyzer)4 MultiPhrasePrefixQuery (org.opensearch.common.lucene.search.MultiPhrasePrefixQuery)4 Analyzer (org.apache.lucene.analysis.Analyzer)3 MultiTermQuery (org.apache.lucene.search.MultiTermQuery)3 BytesRef (org.apache.lucene.util.BytesRef)3 XContentBuilder (org.opensearch.common.xcontent.XContentBuilder)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2