Search in sources :

Example 1 with CommonTermsQuery

use of org.apache.lucene.queries.CommonTermsQuery in project elasticsearch by elastic.

the class QueryAnalyzerTests method testExtractQueryMetadata_commonTermsQuery.

public void testExtractQueryMetadata_commonTermsQuery() {
    CommonTermsQuery commonTermsQuery = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 100);
    commonTermsQuery.add(new Term("_field", "_term1"));
    commonTermsQuery.add(new Term("_field", "_term2"));
    Result result = analyze(commonTermsQuery);
    assertThat(result.verified, is(false));
    List<Term> terms = new ArrayList<>(result.terms);
    Collections.sort(terms);
    assertThat(terms.size(), equalTo(2));
    assertThat(terms.get(0).field(), equalTo("_field"));
    assertThat(terms.get(0).text(), equalTo("_term1"));
    assertThat(terms.get(1).field(), equalTo("_field"));
    assertThat(terms.get(1).text(), equalTo("_term2"));
}
Also used : ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) QueryAnalyzer.selectTermListWithTheLongestShortestTerm(org.elasticsearch.percolator.QueryAnalyzer.selectTermListWithTheLongestShortestTerm) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) Result(org.elasticsearch.percolator.QueryAnalyzer.Result)

Example 2 with CommonTermsQuery

use of org.apache.lucene.queries.CommonTermsQuery in project elasticsearch by elastic.

the class CustomUnifiedHighlighter method rewriteCustomQuery.

/**
     * Translate custom queries in queries that are supported by the unified highlighter.
     */
private Collection<Query> rewriteCustomQuery(Query query) {
    if (query instanceof MultiPhrasePrefixQuery) {
        MultiPhrasePrefixQuery mpq = (MultiPhrasePrefixQuery) query;
        Term[][] terms = mpq.getTerms();
        int[] positions = mpq.getPositions();
        SpanQuery[] positionSpanQueries = new SpanQuery[positions.length];
        int sizeMinus1 = terms.length - 1;
        for (int i = 0; i < positions.length; i++) {
            SpanQuery[] innerQueries = new SpanQuery[terms[i].length];
            for (int j = 0; j < terms[i].length; j++) {
                if (i == sizeMinus1) {
                    innerQueries[j] = new SpanMultiTermQueryWrapper(new PrefixQuery(terms[i][j]));
                } else {
                    innerQueries[j] = new SpanTermQuery(terms[i][j]);
                }
            }
            if (innerQueries.length > 1) {
                positionSpanQueries[i] = new SpanOrQuery(innerQueries);
            } else {
                positionSpanQueries[i] = innerQueries[0];
            }
        }
        // sum position increments beyond 1
        int positionGaps = 0;
        if (positions.length >= 2) {
            // positions are in increasing order.   max(0,...) is just a safeguard.
            positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
        }
        //if original slop is 0 then require inOrder
        boolean inorder = (mpq.getSlop() == 0);
        return Collections.singletonList(new SpanNearQuery(positionSpanQueries, mpq.getSlop() + positionGaps, inorder));
    } else if (query instanceof CommonTermsQuery) {
        CommonTermsQuery ctq = (CommonTermsQuery) query;
        List<Query> tqs = new ArrayList<>();
        for (Term term : ctq.getTerms()) {
            tqs.add(new TermQuery(term));
        }
        return tqs;
    } else if (query instanceof AllTermQuery) {
        AllTermQuery atq = (AllTermQuery) query;
        return Collections.singletonList(new TermQuery(atq.getTerm()));
    } else if (query instanceof FunctionScoreQuery) {
        return Collections.singletonList(((FunctionScoreQuery) query).getSubQuery());
    } else if (query instanceof FiltersFunctionScoreQuery) {
        return Collections.singletonList(((FiltersFunctionScoreQuery) query).getSubQuery());
    } else {
        return null;
    }
}
Also used : SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) TermQuery(org.apache.lucene.search.TermQuery) SpanMultiTermQueryWrapper(org.apache.lucene.search.spans.SpanMultiTermQueryWrapper) FunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FunctionScoreQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) Term(org.apache.lucene.index.Term) AllTermQuery(org.elasticsearch.common.lucene.all.AllTermQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) FiltersFunctionScoreQuery(org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) MultiPhrasePrefixQuery(org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery) ArrayList(java.util.ArrayList) List(java.util.List) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 3 with CommonTermsQuery

use of org.apache.lucene.queries.CommonTermsQuery in project lucene-solr by apache.

the class SynonymTokenizer method testHighlightingCommonTermsQuery.

public void testHighlightingCommonTermsQuery() throws Exception {
    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
    //stop-word
    query.add(new Term(FIELD_NAME, "this"));
    query.add(new Term(FIELD_NAME, "long"));
    query.add(new Term(FIELD_NAME, "very"));
    searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10, new Sort(SortField.FIELD_DOC, SortField.FIELD_SCORE));
    assertEquals(2, hits.totalHits);
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(scorer);
    final int docId0 = hits.scoreDocs[0].doc;
    Document doc = searcher.doc(docId0);
    String storedField = doc.get(FIELD_NAME);
    TokenStream stream = getAnyTokenStream(FIELD_NAME, docId0);
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
    highlighter.setTextFragmenter(fragmenter);
    String fragment = highlighter.getBestFragment(stream, storedField);
    assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
    final int docId1 = hits.scoreDocs[1].doc;
    doc = searcher.doc(docId1);
    storedField = doc.get(FIELD_NAME);
    stream = getAnyTokenStream(FIELD_NAME, docId1);
    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
    fragment = highlighter.getBestFragment(stream, storedField);
    assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Sort(org.apache.lucene.search.Sort) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery)

Example 4 with CommonTermsQuery

use of org.apache.lucene.queries.CommonTermsQuery in project lucene-solr by apache.

the class FastVectorHighlighterTest method testCommonTermsQueryHighlight.

public void testCommonTermsQueryHighlight() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)));
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    String[] texts = { "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", "JFK has been shot", "John Kennedy has been shot", "This text has a typo in referring to Keneddy", "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
    for (int i = 0; i < texts.length; i++) {
        Document doc = new Document();
        Field field = new Field("field", texts[i], type);
        doc.add(field);
        writer.addDocument(doc);
    }
    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
    query.add(new Term("field", "text"));
    query.add(new Term("field", "long"));
    query.add(new Term("field", "very"));
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer);
    IndexSearcher searcher = newSearcher(reader);
    TopDocs hits = searcher.search(query, 10);
    assertEquals(2, hits.totalHits);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
    String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
    assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
    fieldQuery = highlighter.getFieldQuery(query, reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1);
    assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);
    reader.close();
    writer.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) TopDocs(org.apache.lucene.search.TopDocs) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 5 with CommonTermsQuery

use of org.apache.lucene.queries.CommonTermsQuery in project lucene-solr by apache.

the class WeightedSpanTermExtractor method extract.

/**
   * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
   * 
   * @param query
   *          Query to extract Terms from
   * @param terms
   *          Map to place created WeightedSpanTerms in
   * @throws IOException If there is a low-level I/O error
   */
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
    if (query instanceof BoostQuery) {
        BoostQuery boostQuery = (BoostQuery) query;
        extract(boostQuery.getQuery(), boost * boostQuery.getBoost(), terms);
    } else if (query instanceof BooleanQuery) {
        for (BooleanClause clause : (BooleanQuery) query) {
            if (!clause.isProhibited()) {
                extract(clause.getQuery(), boost, terms);
            }
        }
    } else if (query instanceof PhraseQuery) {
        PhraseQuery phraseQuery = ((PhraseQuery) query);
        Term[] phraseQueryTerms = phraseQuery.getTerms();
        if (phraseQueryTerms.length == 1) {
            extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
        } else {
            SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
            for (int i = 0; i < phraseQueryTerms.length; i++) {
                clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
            }
            // sum position increments beyond 1
            int positionGaps = 0;
            int[] positions = phraseQuery.getPositions();
            if (positions.length >= 2) {
                // positions are in increasing order.   max(0,...) is just a safeguard.
                positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
            }
            //if original slop is 0 then require inOrder
            boolean inorder = (phraseQuery.getSlop() == 0);
            SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
            extractWeightedSpanTerms(terms, sp, boost);
        }
    } else if (query instanceof TermQuery || query instanceof SynonymQuery) {
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof SpanQuery) {
        extractWeightedSpanTerms(terms, (SpanQuery) query, boost);
    } else if (query instanceof ConstantScoreQuery) {
        final Query q = ((ConstantScoreQuery) query).getQuery();
        if (q != null) {
            extract(q, boost, terms);
        }
    } else if (query instanceof CommonTermsQuery) {
        // specialized since rewriting would change the result query 
        // this query is TermContext sensitive.
        extractWeightedTerms(terms, query, boost);
    } else if (query instanceof DisjunctionMaxQuery) {
        for (Query clause : ((DisjunctionMaxQuery) query)) {
            extract(clause, boost, terms);
        }
    } else if (query instanceof ToParentBlockJoinQuery) {
        extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
    } else if (query instanceof ToChildBlockJoinQuery) {
        extract(((ToChildBlockJoinQuery) query).getParentQuery(), boost, terms);
    } else if (query instanceof MultiPhraseQuery) {
        final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
        final Term[][] termArrays = mpq.getTermArrays();
        final int[] positions = mpq.getPositions();
        if (positions.length > 0) {
            int maxPosition = positions[positions.length - 1];
            for (int i = 0; i < positions.length - 1; ++i) {
                if (positions[i] > maxPosition) {
                    maxPosition = positions[i];
                }
            }
            @SuppressWarnings({ "unchecked", "rawtypes" }) final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
            int distinctPositions = 0;
            for (int i = 0; i < termArrays.length; ++i) {
                final Term[] termArray = termArrays[i];
                List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                if (disjuncts == null) {
                    disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
                    ++distinctPositions;
                }
                for (Term aTermArray : termArray) {
                    disjuncts.add(new SpanTermQuery(aTermArray));
                }
            }
            int positionGaps = 0;
            int position = 0;
            final SpanQuery[] clauses = new SpanQuery[distinctPositions];
            for (List<SpanQuery> disjuncts : disjunctLists) {
                if (disjuncts != null) {
                    clauses[position++] = new SpanOrQuery(disjuncts.toArray(new SpanQuery[disjuncts.size()]));
                } else {
                    ++positionGaps;
                }
            }
            if (clauses.length == 1) {
                extractWeightedSpanTerms(terms, clauses[0], boost);
            } else {
                final int slop = mpq.getSlop();
                final boolean inorder = (slop == 0);
                SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                extractWeightedSpanTerms(terms, sp, boost);
            }
        }
    } else if (query instanceof MatchAllDocsQuery) {
    //nothing
    } else if (query instanceof CustomScoreQuery) {
        extract(((CustomScoreQuery) query).getSubQuery(), boost, terms);
    } else if (isQueryUnsupported(query.getClass())) {
    // nothing
    } else {
        if (query instanceof MultiTermQuery && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
            return;
        }
        Query origQuery = query;
        final IndexReader reader = getLeafContext().reader();
        Query rewritten;
        if (query instanceof MultiTermQuery) {
            rewritten = MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(reader, (MultiTermQuery) query);
        } else {
            rewritten = origQuery.rewrite(reader);
        }
        if (rewritten != origQuery) {
            // only rewrite once and then flatten again - the rewritten query could have a special treatment
            // if this method is overwritten in a subclass or above in the next recursion
            extract(rewritten, boost, terms);
        } else {
            extractUnknownQuery(query, terms);
        }
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) BoostQuery(org.apache.lucene.search.BoostQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) List(java.util.List) ArrayList(java.util.ArrayList) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) Term(org.apache.lucene.index.Term) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ToChildBlockJoinQuery(org.apache.lucene.search.join.ToChildBlockJoinQuery) FieldMaskingSpanQuery(org.apache.lucene.search.spans.FieldMaskingSpanQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) BooleanClause(org.apache.lucene.search.BooleanClause) ToParentBlockJoinQuery(org.apache.lucene.search.join.ToParentBlockJoinQuery) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Aggregations

Term (org.apache.lucene.index.Term)8 CommonTermsQuery (org.apache.lucene.queries.CommonTermsQuery)8 ArrayList (java.util.ArrayList)4 Document (org.apache.lucene.document.Document)4 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)4 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)4 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)4 IndexReader (org.apache.lucene.index.IndexReader)3 TermQuery (org.apache.lucene.search.TermQuery)3 TopDocs (org.apache.lucene.search.TopDocs)3 SpanQuery (org.apache.lucene.search.spans.SpanQuery)3 List (java.util.List)2 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)2 TokenStream (org.apache.lucene.analysis.TokenStream)2 IntPoint (org.apache.lucene.document.IntPoint)2 TextField (org.apache.lucene.document.TextField)2 CustomScoreQuery (org.apache.lucene.queries.CustomScoreQuery)2 BooleanQuery (org.apache.lucene.search.BooleanQuery)2 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2