Search in sources :

Example 1 with MultiPhrasePrefixQuery

use of org.opensearch.common.lucene.search.MultiPhrasePrefixQuery in project OpenSearch by opensearch-project.

the class CustomUnifiedHighlighter method rewriteCustomQuery.

/**
 * Translate custom queries in queries that are supported by the unified highlighter.
 */
private Collection<Query> rewriteCustomQuery(Query query) {
    if (query instanceof MultiPhrasePrefixQuery) {
        MultiPhrasePrefixQuery mpq = (MultiPhrasePrefixQuery) query;
        Term[][] terms = mpq.getTerms();
        int[] positions = mpq.getPositions();
        SpanQuery[] positionSpanQueries = new SpanQuery[positions.length];
        int sizeMinus1 = terms.length - 1;
        for (int i = 0; i < positions.length; i++) {
            SpanQuery[] innerQueries = new SpanQuery[terms[i].length];
            for (int j = 0; j < terms[i].length; j++) {
                if (i == sizeMinus1) {
                    innerQueries[j] = new SpanMultiTermQueryWrapper<>(new PrefixQuery(terms[i][j]));
                } else {
                    innerQueries[j] = new SpanTermQuery(terms[i][j]);
                }
            }
            if (innerQueries.length > 1) {
                positionSpanQueries[i] = new SpanOrQuery(innerQueries);
            } else {
                positionSpanQueries[i] = innerQueries[0];
            }
        }
        if (positionSpanQueries.length == 1) {
            return Collections.singletonList(positionSpanQueries[0]);
        }
        // sum position increments beyond 1
        int positionGaps = 0;
        if (positions.length >= 2) {
            // positions are in increasing order. max(0,...) is just a safeguard.
            positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
        }
        // if original slop is 0 then require inOrder
        boolean inorder = (mpq.getSlop() == 0);
        return Collections.singletonList(new SpanNearQuery(positionSpanQueries, mpq.getSlop() + positionGaps, inorder));
    } else {
        return null;
    }
}
Also used : PrefixQuery(org.apache.lucene.search.PrefixQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery)

Example 2 with MultiPhrasePrefixQuery

use of org.opensearch.common.lucene.search.MultiPhrasePrefixQuery in project OpenSearch by opensearch-project.

the class CustomFieldQuery method flatten.

@Override
protected void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQueries, float boost) throws IOException {
    if (sourceQuery instanceof BoostQuery) {
        BoostQuery bq = (BoostQuery) sourceQuery;
        sourceQuery = bq.getQuery();
        boost *= bq.getBoost();
        flatten(sourceQuery, reader, flatQueries, boost);
    } else if (sourceQuery instanceof SpanTermQuery) {
        super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost);
    } else if (sourceQuery instanceof ConstantScoreQuery) {
        flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof FunctionScoreQuery) {
        flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhrasePrefixQuery) {
        flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof MultiPhraseQuery) {
        MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery);
        convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries);
    } else if (sourceQuery instanceof BlendedTermQuery) {
        final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery;
        flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost);
    } else if (sourceQuery instanceof org.apache.lucene.queries.function.FunctionScoreQuery) {
        org.apache.lucene.queries.function.FunctionScoreQuery funcScoreQuery = (org.apache.lucene.queries.function.FunctionScoreQuery) sourceQuery;
        // flatten query with query boost
        flatten(funcScoreQuery.getWrappedQuery(), reader, flatQueries, boost);
    } else if (sourceQuery instanceof SynonymQuery) {
        // SynonymQuery should be handled by the parent class directly.
        // This statement should be removed when https://issues.apache.org/jira/browse/LUCENE-7484 is merged.
        SynonymQuery synQuery = (SynonymQuery) sourceQuery;
        for (Term term : synQuery.getTerms()) {
            flatten(new TermQuery(term), reader, flatQueries, boost);
        }
    } else if (sourceQuery instanceof OpenSearchToParentBlockJoinQuery) {
        Query childQuery = ((OpenSearchToParentBlockJoinQuery) sourceQuery).getChildQuery();
        if (childQuery != null) {
            flatten(childQuery, reader, flatQueries, boost);
        }
    } else {
        super.flatten(sourceQuery, reader, flatQueries, boost);
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) FunctionScoreQuery(org.opensearch.common.lucene.search.function.FunctionScoreQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BoostQuery(org.apache.lucene.search.BoostQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) OpenSearchToParentBlockJoinQuery(org.opensearch.index.search.OpenSearchToParentBlockJoinQuery) FunctionScoreQuery(org.opensearch.common.lucene.search.function.FunctionScoreQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) OpenSearchToParentBlockJoinQuery(org.opensearch.index.search.OpenSearchToParentBlockJoinQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BlendedTermQuery(org.apache.lucene.queries.BlendedTermQuery) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery)

Example 3 with MultiPhrasePrefixQuery

use of org.opensearch.common.lucene.search.MultiPhrasePrefixQuery in project OpenSearch by opensearch-project.

the class TextFieldMapper method createPhrasePrefixQuery.

public static Query createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, String prefixField, IntPredicate usePrefixField) throws IOException {
    MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field);
    builder.setSlop(slop);
    builder.setMaxExpansions(maxExpansions);
    List<Term> currentTerms = new ArrayList<>();
    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    stream.reset();
    int position = -1;
    while (stream.incrementToken()) {
        if (posIncrAtt.getPositionIncrement() != 0) {
            if (currentTerms.isEmpty() == false) {
                builder.add(currentTerms.toArray(new Term[0]), position);
            }
            position += posIncrAtt.getPositionIncrement();
            currentTerms.clear();
        }
        currentTerms.add(new Term(field, termAtt.getBytesRef()));
    }
    builder.add(currentTerms.toArray(new Term[0]), position);
    if (prefixField == null) {
        return builder;
    }
    int lastPos = builder.getTerms().length - 1;
    final Term[][] terms = builder.getTerms();
    final int[] positions = builder.getPositions();
    for (Term term : terms[lastPos]) {
        String value = term.text();
        if (usePrefixField.test(value.length()) == false) {
            return builder;
        }
    }
    if (terms.length == 1) {
        Term[] newTerms = Arrays.stream(terms[0]).map(term -> new Term(prefixField, term.bytes())).toArray(Term[]::new);
        return new SynonymQuery(newTerms);
    }
    SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);
    spanQuery.setSlop(slop);
    int previousPos = -1;
    for (int i = 0; i < terms.length; i++) {
        Term[] posTerms = terms[i];
        int posInc = positions[i] - previousPos;
        previousPos = positions[i];
        if (posInc > 1) {
            spanQuery.addGap(posInc - 1);
        }
        if (i == lastPos) {
            if (posTerms.length == 1) {
                FieldMaskingSpanQuery fieldMask = new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, posTerms[0].bytes())), field);
                spanQuery.addClause(fieldMask);
            } else {
                SpanQuery[] queries = Arrays.stream(posTerms).map(term -> new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixField, term.bytes())), field)).toArray(SpanQuery[]::new);
                spanQuery.addClause(new SpanOrQuery(queries));
            }
        } else {
            if (posTerms.length == 1) {
                spanQuery.addClause(new SpanTermQuery(posTerms[0]));
            } else {
                SpanTermQuery[] queries = Arrays.stream(posTerms).map(SpanTermQuery::new).toArray(SpanTermQuery[]::new);
                spanQuery.addClause(new SpanOrQuery(queries));
            }
        }
    }
    return spanQuery.build();
}
Also used : Query(org.apache.lucene.search.Query) Arrays(java.util.Arrays) SimilarityProvider(org.opensearch.index.similarity.SimilarityProvider) EdgeNGramTokenFilter(org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter) FieldType(org.apache.lucene.document.FieldType) ToXContent(org.opensearch.common.xcontent.ToXContent) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) Version(org.opensearch.Version) AnalyzerScope(org.opensearch.index.analysis.AnalyzerScope) IntervalMode(org.opensearch.index.query.IntervalMode) IntPredicate(java.util.function.IntPredicate) Operations(org.apache.lucene.util.automaton.Operations) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PagedBytesIndexFieldData(org.opensearch.index.fielddata.plain.PagedBytesIndexFieldData) Map(java.util.Map) Lucene(org.opensearch.common.lucene.Lucene) IntervalsSource(org.apache.lucene.queries.intervals.IntervalsSource) Automata(org.apache.lucene.util.automaton.Automata) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) Iterators(org.opensearch.common.collect.Iterators) BytesRef(org.apache.lucene.util.BytesRef) Automaton(org.apache.lucene.util.automaton.Automaton) PrefixQuery(org.apache.lucene.search.PrefixQuery) SearchLookup(org.opensearch.search.lookup.SearchLookup) Objects(java.util.Objects) List(java.util.List) QueryShardContext(org.opensearch.index.query.QueryShardContext) TokenFilter(org.apache.lucene.analysis.TokenFilter) FixedShingleFilter(org.apache.lucene.analysis.shingle.FixedShingleFilter) IndexAnalyzers(org.opensearch.index.analysis.IndexAnalyzers) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) AutomatonQuery(org.apache.lucene.search.AutomatonQuery) AutomatonQueries(org.opensearch.common.lucene.search.AutomatonQueries) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) IndexFieldData(org.opensearch.index.fielddata.IndexFieldData) LegacyESVersion(org.opensearch.LegacyESVersion) ParserContext(org.opensearch.index.mapper.Mapper.TypeParser.ParserContext) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) IntervalBuilder(org.opensearch.index.query.IntervalBuilder) BytesTermAttribute(org.apache.lucene.analysis.tokenattributes.BytesTermAttribute) CachingTokenFilter(org.apache.lucene.analysis.CachingTokenFilter) CoreValuesSourceType(org.opensearch.search.aggregations.support.CoreValuesSourceType) SpanQuery(org.apache.lucene.search.spans.SpanQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) NamedAnalyzer(org.opensearch.index.analysis.NamedAnalyzer) TokenStream(org.apache.lucene.analysis.TokenStream) Iterator(java.util.Iterator) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) BooleanClause(org.apache.lucene.search.BooleanClause) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) TermQuery(org.apache.lucene.search.TermQuery) AnalyzerWrapper(org.apache.lucene.analysis.AnalyzerWrapper) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Field(org.apache.lucene.document.Field) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) XContentMapValues(org.opensearch.common.xcontent.support.XContentMapValues) Intervals(org.apache.lucene.queries.intervals.Intervals) IndexOptions(org.apache.lucene.index.IndexOptions) Collections(java.util.Collections) SynonymQuery(org.apache.lucene.search.SynonymQuery) IntervalBuilder(org.opensearch.index.query.IntervalBuilder) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) PositionIncrementAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermToBytesRefAttribute(org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 4 with MultiPhrasePrefixQuery

use of org.opensearch.common.lucene.search.MultiPhrasePrefixQuery in project OpenSearch by opensearch-project.

the class CustomUnifiedHighlighterTests method testMultiPhrasePrefixQuerySingleTerm.

public void testMultiPhrasePrefixQuerySingleTerm() throws Exception {
    final String[] inputs = { "The quick brown fox." };
    final String[] outputs = { "The quick <b>brown</b> fox." };
    MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text");
    query.add(new Term("text", "bro"));
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs);
}
Also used : StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) Term(org.apache.lucene.index.Term)

Example 5 with MultiPhrasePrefixQuery

use of org.opensearch.common.lucene.search.MultiPhrasePrefixQuery in project OpenSearch by opensearch-project.

the class CustomUnifiedHighlighterTests method testMultiPhrasePrefixQuery.

public void testMultiPhrasePrefixQuery() throws Exception {
    final String[] inputs = { "The quick brown fox." };
    final String[] outputs = { "The <b>quick</b> <b>brown</b> <b>fox</b>." };
    MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text");
    query.add(new Term("text", "quick"));
    query.add(new Term("text", "brown"));
    query.add(new Term("text", "fo"));
    assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs);
}
Also used : StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) MultiPhrasePrefixQuery(org.opensearch.common.lucene.search.MultiPhrasePrefixQuery) Term(org.apache.lucene.index.Term)

Aggregations

MultiPhrasePrefixQuery (org.opensearch.common.lucene.search.MultiPhrasePrefixQuery)7 Term (org.apache.lucene.index.Term)6 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)5 PhraseQuery (org.apache.lucene.search.PhraseQuery)4 Query (org.apache.lucene.search.Query)4 SynonymQuery (org.apache.lucene.search.SynonymQuery)4 TermQuery (org.apache.lucene.search.TermQuery)4 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)4 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)4 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)3 BooleanQuery (org.apache.lucene.search.BooleanQuery)3 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)3 SpanQuery (org.apache.lucene.search.spans.SpanQuery)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Map (java.util.Map)2 Analyzer (org.apache.lucene.analysis.Analyzer)2 MockSynonymAnalyzer (org.apache.lucene.analysis.MockSynonymAnalyzer)2 TokenStream (org.apache.lucene.analysis.TokenStream)2