Search in sources :

Example 76 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestQueryBuilder method testCJKPhrase.

public void testCJKPhrase() throws Exception {
    // individual CJK chars as terms
    SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
    PhraseQuery expected = new PhraseQuery("field", "中", "国");
    QueryBuilder builder = new QueryBuilder(analyzer);
    assertEquals(expected, builder.createPhraseQuery("field", "中国"));
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery)

Example 77 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestQueryBuilder method testCJKSloppyPhrase.

public void testCJKSloppyPhrase() throws Exception {
    // individual CJK chars as terms
    SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
    PhraseQuery expected = new PhraseQuery(3, "field", "中", "国");
    QueryBuilder builder = new QueryBuilder(analyzer);
    assertEquals(expected, builder.createPhraseQuery("field", "中国", 3));
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery)

Example 78 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestUnifiedHighlighter method testBuddhism.

public void testBuddhism() throws Exception {
    String text = "This eight-volume set brings together seminal papers in Buddhist studies from a vast " + "range of academic disciplines published over the last forty years. With a new introduction " + "by the editor, this collection is a unique and unrivalled research resource for both " + "student and scholar. Coverage includes: - Buddhist origins; early history of Buddhism in " + "South and Southeast Asia - early Buddhist Schools and Doctrinal History; Theravada Doctrine " + "- the Origins and nature of Mahayana Buddhism; some Mahayana religious topics - Abhidharma " + "and Madhyamaka - Yogacara, the Epistemological tradition, and Tathagatagarbha - Tantric " + "Buddhism (Including China and Japan); Buddhism in Nepal and Tibet - Buddhism in South and " + "Southeast Asia, and - Buddhism in China, East Asia, and Japan.";
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", text, fieldType);
    Document document = new Document();
    document.add(body);
    iw.addDocument(document);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    PhraseQuery query = new PhraseQuery.Builder().add(new Term("body", "buddhist")).add(new Term("body", "origins")).build();
    TopDocs topDocs = searcher.search(query, 10);
    assertEquals(1, topDocs.totalHits);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setHighlightPhrasesStrictly(false);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertTrue(snippets[0].contains("<b>Buddhist</b> <b>origins</b>"));
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PhraseQuery(org.apache.lucene.search.PhraseQuery) IndexReader(org.apache.lucene.index.IndexReader) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 79 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestUnifiedHighlighter method testFieldMatcherPhraseQuery.

public void testFieldMatcherPhraseQuery() throws Exception {
    IndexReader ir = indexSomeFields();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected Predicate<String> getFieldMatcher(String field) {
            // requireFieldMatch=false
            return (qf) -> true;
        }
    };
    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder().add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD).add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD).add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD).add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD).add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
    Query query = queryBuilder.build();
    // title
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // text
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // category
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Arrays(java.util.Arrays) ParametersFactory(com.carrotsearch.randomizedtesting.annotations.ParametersFactory) ScoreDoc(org.apache.lucene.search.ScoreDoc) SuppressCodecs(org.apache.lucene.util.LuceneTestCase.SuppressCodecs) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) After(org.junit.After) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Before(org.junit.Before) TopDocs(org.apache.lucene.search.TopDocs) Predicate(java.util.function.Predicate) Sort(org.apache.lucene.search.Sort) PrefixQuery(org.apache.lucene.search.PrefixQuery) IOException(java.io.IOException) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) InputStreamReader(java.io.InputStreamReader) StandardCharsets(java.nio.charset.StandardCharsets) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BooleanClause(org.apache.lucene.search.BooleanClause) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BreakIterator(java.text.BreakIterator) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) BufferedReader(java.io.BufferedReader) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) TopDocs(org.apache.lucene.search.TopDocs) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) IndexReader(org.apache.lucene.index.IndexReader)

Example 80 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testPosTermStdTerm.

// position sensitive query added after position insensitive query
public void testPosTermStdTerm() throws Exception {
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "y")), Occur.SHOULD);
    PhraseQuery phraseQuery = new PhraseQuery(FIELD_NAME, "x", "y", "z");
    booleanQuery.add(phraseQuery, Occur.SHOULD);
    doSearching(booleanQuery.build());
    int maxNumFragmentsRequired = 2;
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        highlighter.setTextFragmenter(new SimpleFragmenter(40));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
        assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)105 Term (org.apache.lucene.index.Term)56 TermQuery (org.apache.lucene.search.TermQuery)43 BooleanQuery (org.apache.lucene.search.BooleanQuery)39 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)37 Document (org.apache.lucene.document.Document)36 Query (org.apache.lucene.search.Query)30 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 TopDocs (org.apache.lucene.search.TopDocs)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 BoostQuery (org.apache.lucene.search.BoostQuery)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 ArrayList (java.util.ArrayList)11 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10