Search in sources :

Example 86 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class FastVectorHighlighterTest method testPhraseHighlightTest.

// see LUCENE-4899
public void testPhraseHighlightTest() throws IOException {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    Document doc = new Document();
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
    Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
    doc.add(longTermField);
    doc.add(noLongTermField);
    writer.addDocument(doc);
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    IndexReader reader = DirectoryReader.open(writer);
    int docId = 0;
    String field = "no_long_term";
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
        query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
        query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
        FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        // highlighted results are centered
        assertEquals(1, bestFragments.length);
        assertEquals("<b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
    }
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        PhraseQuery pq = new PhraseQuery(5, field, "test", "foo", "highlighed");
        query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
        query.add(pq, Occur.MUST);
        query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
        FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        // highlighted results are centered
        assertEquals(0, bestFragments.length);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
        // highlighted results are centered
        assertEquals(1, bestFragments.length);
        assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
    }
    {
        PhraseQuery query = new PhraseQuery(3, field, "test", "foo", "highlighed");
        FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        // highlighted results are centered
        assertEquals(0, bestFragments.length);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
        // highlighted results are centered
        assertEquals(1, bestFragments.length);
        assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
    }
    {
        PhraseQuery query = new PhraseQuery(30, field, "test", "foo", "highlighed");
        FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        assertEquals(0, bestFragments.length);
    }
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        PhraseQuery pq = new PhraseQuery(5, field, "test", "foo", "highlighed");
        BooleanQuery.Builder inner = new BooleanQuery.Builder();
        inner.add(pq, Occur.MUST);
        inner.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
        query.add(inner.build(), Occur.MUST);
        query.add(pq, Occur.MUST);
        query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
        FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        assertEquals(0, bestFragments.length);
        bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 30, 1);
        // highlighted results are centered
        assertEquals(1, bestFragments.length);
        assertEquals("a <b>test</b> where <b>foo</b> is <b>highlighed</b> and", bestFragments[0]);
    }
    field = "long_term";
    {
        BooleanQuery.Builder query = new BooleanQuery.Builder();
        query.add(new TermQuery(new Term(field, "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
        query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
        query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
        FieldQuery fieldQuery = highlighter.getFieldQuery(query.build(), reader);
        String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, field, 18, 1);
        // highlighted results are centered
        assertEquals(1, bestFragments.length);
        assertEquals("<b>thisisaverylongwordandmakessurethisfails</b>", bestFragments[0]);
    }
    reader.close();
    writer.close();
    dir.close();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 87 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class FastVectorHighlighterTest method testPhrasesSpanningFieldValues.

public void testPhrasesSpanningFieldValues() throws IOException {
    Directory dir = newDirectory();
    // positionIncrementGap is 0 so the pharse is found across multiple field
    // values.
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    FieldType type = new FieldType(TextField.TYPE_STORED);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectors(true);
    type.freeze();
    Document doc = new Document();
    doc.add(new Field("field", "one two three five", type));
    doc.add(new Field("field", "two three four", type));
    doc.add(new Field("field", "five six five", type));
    doc.add(new Field("field", "six seven eight nine eight nine eight " + "nine eight nine eight nine eight nine", type));
    doc.add(new Field("field", "eight nine", type));
    doc.add(new Field("field", "ten eleven", type));
    doc.add(new Field("field", "twelve thirteen", type));
    writer.addDocument(doc);
    BaseFragListBuilder fragListBuilder = new SimpleFragListBuilder();
    BaseFragmentsBuilder fragmentsBuilder = new SimpleFragmentsBuilder();
    fragmentsBuilder.setDiscreteMultiValueHighlighting(true);
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, fragListBuilder, fragmentsBuilder);
    IndexReader reader = DirectoryReader.open(writer);
    int docId = 0;
    // Phrase that spans a field value
    Query q = new PhraseQuery("field", "four", "five");
    FieldQuery fieldQuery = highlighter.getFieldQuery(q, reader);
    String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
    assertEquals("two three <b>four</b>", bestFragments[0]);
    assertEquals("<b>five</b> six five", bestFragments[1]);
    assertEquals(2, bestFragments.length);
    // Phrase that ends at a field value
    q = new PhraseQuery("field", "three", "five");
    fieldQuery = highlighter.getFieldQuery(q, reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
    assertEquals("one two <b>three five</b>", bestFragments[0]);
    assertEquals(1, bestFragments.length);
    // Phrase that spans across three values
    q = new PhraseQuery("field", "nine", "ten", "eleven", "twelve");
    fieldQuery = highlighter.getFieldQuery(q, reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
    assertEquals("eight <b>nine</b>", bestFragments[0]);
    assertEquals("<b>ten eleven</b>", bestFragments[1]);
    assertEquals("<b>twelve</b> thirteen", bestFragments[2]);
    assertEquals(3, bestFragments.length);
    // Term query that appears in multiple values
    q = new TermQuery(new Term("field", "two"));
    fieldQuery = highlighter.getFieldQuery(q, reader);
    bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 1000, 1000);
    assertEquals("one <b>two</b> three five", bestFragments[0]);
    assertEquals("<b>two</b> three four", bestFragments[1]);
    assertEquals(2, bestFragments.length);
    reader.close();
    writer.close();
    dir.close();
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) CommonTermsQuery(org.apache.lucene.queries.CommonTermsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) CustomScoreQuery(org.apache.lucene.queries.CustomScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) SynonymQuery(org.apache.lucene.search.SynonymQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) FieldType(org.apache.lucene.document.FieldType) StoredField(org.apache.lucene.document.StoredField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 88 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testPreSpanQueryRewrite.

public void testPreSpanQueryRewrite() throws IOException {
    indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
    initReaderSearcherHighlighter();
    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected Collection<Query> preSpanQueryRewrite(Query query) {
            if (query instanceof MyQuery) {
                return Collections.singletonList(((MyQuery) query).wrapped);
            }
            return null;
        }
    };
    highlighter.setHighlightPhrasesStrictly(true);
    BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
    Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
    Query oredTerms = new BooleanQuery.Builder().setMinimumNumberShouldMatch(2).add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD).build();
    Query proximityBoostingQuery = new MyQuery(oredTerms);
    Query totalQuery = bqBuilder.add(phraseQuery, BooleanClause.Occur.SHOULD).add(proximityBoostingQuery, BooleanClause.Occur.SHOULD).build();
    TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
    assertArrayEquals(new String[] { "There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary." }, snippets);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) QueryBuilder(org.apache.lucene.util.QueryBuilder) Term(org.apache.lucene.index.Term) BoostQuery(org.apache.lucene.search.BoostQuery) TopDocs(org.apache.lucene.search.TopDocs) Collection(java.util.Collection)

Example 89 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterStrictPhrases method testMultiValued.

public void testMultiValued() throws IOException {
    indexWriter.addDocument(newDoc("one bravo three", "four bravo six"));
    initReaderSearcherHighlighter();
    BooleanQuery query = new BooleanQuery.Builder().add(newPhraseQuery("body", "one bravo"), BooleanClause.Occur.MUST).add(newPhraseQuery("body", "four bravo"), BooleanClause.Occur.MUST).add(new PrefixQuery(new Term("body", "br")), BooleanClause.Occur.MUST).build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertArrayEquals(new String[] { "<b>one</b> <b>bravo</b> three... <b>four</b> <b>bravo</b> six" }, snippets);
    // now test phraseQuery won't span across values
    assert indexAnalyzer.getPositionIncrementGap("body") > 0;
    PhraseQuery phraseQuery = newPhraseQuery("body", "three four");
    // 1 too little; won't span
    phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body") - 1);
    query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST).add(phraseQuery, BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(query, 10);
    snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals("one <b>bravo</b> three... four <b>bravo</b> six", snippets[0]);
    // and add just enough slop to cross the values:
    phraseQuery = newPhraseQuery("body", "three four");
    // just enough to span
    phraseQuery = setSlop(phraseQuery, indexAnalyzer.getPositionIncrementGap("body"));
    query = new BooleanQuery.Builder().add(new TermQuery(new Term("body", "bravo")), BooleanClause.Occur.MUST).add(phraseQuery, // must match and it will
    BooleanClause.Occur.MUST).build();
    topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals("one <b>bravo</b> <b>three</b>... <b>four</b> <b>bravo</b> six", snippets[0]);
}
Also used : TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) QueryBuilder(org.apache.lucene.util.QueryBuilder) Term(org.apache.lucene.index.Term)

Example 90 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestQPHelper method testCJKPhrase.

public void testCJKPhrase() throws Exception {
    // individual CJK chars as terms
    SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
    PhraseQuery expected = new PhraseQuery("field", "中", "国");
    assertEquals(expected, getQuery("\"中国\"", analyzer));
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)105 Term (org.apache.lucene.index.Term)56 TermQuery (org.apache.lucene.search.TermQuery)43 BooleanQuery (org.apache.lucene.search.BooleanQuery)39 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)37 Document (org.apache.lucene.document.Document)36 Query (org.apache.lucene.search.Query)30 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 TopDocs (org.apache.lucene.search.TopDocs)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 BoostQuery (org.apache.lucene.search.BoostQuery)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 ArrayList (java.util.ArrayList)11 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10