Search in sources :

Example 46 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testOnePrefix.

public void testOnePrefix() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("This is a test.");
    iw.addDocument(doc);
    body.setStringValue("Test a one sentence document.");
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    // wrap in a BoostQuery to also show we see inside it
    Query query = new BoostQuery(new PrefixQuery(new Term("body", "te")), 2.0f);
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a <b>test</b>.", snippets[0]);
    assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
    // wrong field
    BooleanQuery bq = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("bogus", "te")), BooleanClause.Occur.SHOULD).build();
    topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
    assertEquals(2, topDocs.totalHits);
    snippets = highlighter.highlight("body", bq, topDocs);
    assertEquals(2, snippets.length);
    assertEquals("This is a test.", snippets[0]);
    assertEquals("Test a one sentence document.", snippets[1]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 47 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighter method testFieldMatcherMultiTermQuery.

public void testFieldMatcherMultiTermQuery() throws Exception {
    IndexReader ir = indexSomeFields();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected Predicate<String> getFieldMatcher(String field) {
            // requireFieldMatch=false
            return (qf) -> true;
        }
    };
    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder().add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD).add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD).add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD).add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
    Query query = queryBuilder.build();
    // title
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
        snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // text
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
        snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    // category
    {
        TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
        assertEquals(1, topDocs.totalHits);
        String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
        snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
        assertEquals(1, snippets.length);
        assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
        highlighterFieldMatch.setFieldMatcher(null);
    }
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) Arrays(java.util.Arrays) ParametersFactory(com.carrotsearch.randomizedtesting.annotations.ParametersFactory) ScoreDoc(org.apache.lucene.search.ScoreDoc) SuppressCodecs(org.apache.lucene.util.LuceneTestCase.SuppressCodecs) FieldType(org.apache.lucene.document.FieldType) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Term(org.apache.lucene.index.Term) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) Map(java.util.Map) Directory(org.apache.lucene.store.Directory) After(org.junit.After) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Before(org.junit.Before) TopDocs(org.apache.lucene.search.TopDocs) Predicate(java.util.function.Predicate) Sort(org.apache.lucene.search.Sort) PrefixQuery(org.apache.lucene.search.PrefixQuery) IOException(java.io.IOException) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) InputStreamReader(java.io.InputStreamReader) StandardCharsets(java.nio.charset.StandardCharsets) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BooleanClause(org.apache.lucene.search.BooleanClause) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BreakIterator(java.text.BreakIterator) Field(org.apache.lucene.document.Field) LuceneTestCase(org.apache.lucene.util.LuceneTestCase) BufferedReader(java.io.BufferedReader) IndexOptions(org.apache.lucene.index.IndexOptions) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Collections(java.util.Collections) IndexReader(org.apache.lucene.index.IndexReader) IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) TopDocs(org.apache.lucene.search.TopDocs) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) IndexReader(org.apache.lucene.index.IndexReader)

Example 48 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testRussianPrefixQuery.

// LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
public void testRussianPrefixQuery() throws IOException {
    Analyzer analyzer = new StandardAnalyzer();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
    String field = "title";
    Document doc = new Document();
    // Russian char; uses 2 UTF8 bytes
    doc.add(new Field(field, "я", fieldType));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    Query query = new PrefixQuery(new Term(field, "я"));
    TopDocs topDocs = searcher.search(query, 1);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
    String[] snippets = highlighter.highlight(field, query, topDocs);
    assertEquals("[<b>я</b>]", Arrays.toString(snippets));
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) SpanFirstQuery(org.apache.lucene.search.spans.SpanFirstQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) SpanBoostQuery(org.apache.lucene.search.spans.SpanBoostQuery) SpanNotQuery(org.apache.lucene.search.spans.SpanNotQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) SpanOrQuery(org.apache.lucene.search.spans.SpanOrQuery) Term(org.apache.lucene.index.Term) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 49 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class TestUnifiedHighlighterMTQ method testTokenStreamIsClosed.

public void testTokenStreamIsClosed() throws Exception {
    // note: test is a derivative of testWithMaxLen()
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Field body = new Field("body", "", fieldType);
    Document doc = new Document();
    doc.add(body);
    body.setStringValue("Alpha Bravo foo foo foo. Foo foo Alpha Bravo");
    if (random().nextBoolean()) {
        // sometimes add a 2nd value (maybe matters?)
        doc.add(new Field("body", "2nd value Alpha Bravo", fieldType));
    }
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    // use this buggy Analyzer at highlight time
    Analyzer buggyAnalyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer buggyTokenizer = new Tokenizer() {

                @Override
                public boolean incrementToken() throws IOException {
                    throw new IOException("EXPECTED");
                }
            };
            return new TokenStreamComponents(buggyTokenizer);
        }
    };
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, buggyAnalyzer);
    highlighter.setHandleMultiTermQuery(true);
    if (rarely()) {
        //a little past first sentence
        highlighter.setMaxLength(25);
    }
    boolean hasClauses = false;
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
    if (random().nextBoolean()) {
        hasClauses = true;
        queryBuilder.add(new TermQuery(new Term("body", "alpha")), BooleanClause.Occur.MUST);
    }
    if (!hasClauses || random().nextBoolean()) {
        queryBuilder.add(new PrefixQuery(new Term("body", "bra")), BooleanClause.Occur.MUST);
    }
    BooleanQuery query = queryBuilder.build();
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    try {
        String[] snippets = highlighter.highlight("body", query, topDocs, 2);
        // don't even care what the results are; just want to test exception behavior
        if (fieldType == UHTestHelper.reanalysisType) {
            fail("Expecting EXPECTED IOException");
        }
    } catch (Exception e) {
        if (!e.getMessage().contains("EXPECTED")) {
            throw e;
        }
    }
    ir.close();
    try (TokenStream ts = buggyAnalyzer.tokenStream("body", "anything")) {
        // hopefully doesn't throw
        ts.reset();
    // don't call incrementToken; we know it's buggy ;-)
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TokenStream(org.apache.lucene.analysis.TokenStream) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) IOException(java.io.IOException) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) PrefixQuery(org.apache.lucene.search.PrefixQuery) IndexReader(org.apache.lucene.index.IndexReader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 50 with PrefixQuery

use of org.apache.lucene.search.PrefixQuery in project lucene-solr by apache.

the class SynonymTokenizer method testGetBestFragmentsMultiTerm.

public void testGetBestFragmentsMultiTerm() throws Exception {
    TestHighlightRunner helper = new TestHighlightRunner() {

        @Override
        public void run() throws Exception {
            numHighlights = 0;
            BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
            booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "john")), Occur.SHOULD);
            PrefixQuery prefixQuery = new PrefixQuery(new Term(FIELD_NAME, "kenn"));
            prefixQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
            booleanQuery.add(prefixQuery, Occur.SHOULD);
            doSearching(booleanQuery.build());
            doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
            assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 5);
        }
    };
    helper.start();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) TermQuery(org.apache.lucene.search.TermQuery) TestHighlightRunner(org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner) PrefixQuery(org.apache.lucene.search.PrefixQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term)

Aggregations

PrefixQuery (org.apache.lucene.search.PrefixQuery)68 Term (org.apache.lucene.index.Term)62 BooleanQuery (org.apache.lucene.search.BooleanQuery)34 Query (org.apache.lucene.search.Query)30 TermQuery (org.apache.lucene.search.TermQuery)29 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)27 WildcardQuery (org.apache.lucene.search.WildcardQuery)23 BoostQuery (org.apache.lucene.search.BoostQuery)20 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)19 SpanNearQuery (org.apache.lucene.search.spans.SpanNearQuery)15 Document (org.apache.lucene.document.Document)14 IndexSearcher (org.apache.lucene.search.IndexSearcher)14 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)14 PhraseQuery (org.apache.lucene.search.PhraseQuery)14 RegexpQuery (org.apache.lucene.search.RegexpQuery)13 TopDocs (org.apache.lucene.search.TopDocs)13 SpanOrQuery (org.apache.lucene.search.spans.SpanOrQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 TermRangeQuery (org.apache.lucene.search.TermRangeQuery)11 Field (org.apache.lucene.document.Field)10