Search in sources :

Example 66 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testRepeatingTermsInMultBooleans.

// LUCENE-1752
public void testRepeatingTermsInMultBooleans() throws Exception {
    String content = "x y z a b c d e f g b c g";
    String f1 = "f1";
    String f2 = "f2";
    PhraseQuery f1ph1 = new PhraseQuery(f1, "a", "b", "c", "d");
    PhraseQuery f2ph1 = new PhraseQuery(f2, "a", "b", "c", "d");
    PhraseQuery f1ph2 = new PhraseQuery(f1, "b", "c", "g");
    PhraseQuery f2ph2 = new PhraseQuery(f2, "b", "c", "g");
    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    BooleanQuery.Builder leftChild = new BooleanQuery.Builder();
    leftChild.add(f1ph1, Occur.SHOULD);
    leftChild.add(f2ph1, Occur.SHOULD);
    booleanQuery.add(leftChild.build(), Occur.MUST);
    BooleanQuery.Builder rightChild = new BooleanQuery.Builder();
    rightChild.add(f1ph2, Occur.SHOULD);
    rightChild.add(f2ph2, Occur.SHOULD);
    booleanQuery.add(rightChild.build(), Occur.MUST);
    QueryScorer scorer = new QueryScorer(booleanQuery.build(), f1);
    scorer.setExpandMultiTermQuery(false);
    Highlighter h = new Highlighter(this, scorer);
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    h.getBestFragment(analyzer, f1, content);
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 7);
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockPayloadAnalyzer(org.apache.lucene.analysis.MockPayloadAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer)

Example 67 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testHighlighterWithPhraseQuery.

public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
    final String fieldName = "substring";
    final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
    assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BytesRef(org.apache.lucene.util.BytesRef)

Example 68 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class HighlighterPhraseTest method testConcurrentSpan.

public void testConcurrentSpan() throws IOException, InvalidTokenOffsetsException {
    final String TEXT = "the fox jumped";
    final Directory directory = newDirectory();
    final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
    try {
        final Document document = new Document();
        FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
        customType.setStoreTermVectorOffsets(true);
        customType.setStoreTermVectorPositions(true);
        customType.setStoreTermVectors(true);
        document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
        indexWriter.addDocument(document);
    } finally {
        indexWriter.close();
    }
    final IndexReader indexReader = DirectoryReader.open(directory);
    try {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "fox")), new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
        final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
        indexSearcher.search(phraseQuery, new SimpleCollector() {

            private int baseDoc;

            @Override
            public void collect(int i) {
                bitset.set(this.baseDoc + i);
            }

            @Override
            protected void doSetNextReader(LeafReaderContext context) throws IOException {
                this.baseDoc = context.docBase;
            }

            @Override
            public void setScorer(org.apache.lucene.search.Scorer scorer) {
            // Do Nothing
            }

            @Override
            public boolean needsScores() {
                return false;
            }
        });
        assertEquals(1, bitset.cardinality());
        final int maxDoc = indexReader.maxDoc();
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        for (int position = bitset.nextSetBit(0); position < maxDoc - 1; position = bitset.nextSetBit(position + 1)) {
            assertEquals(0, position);
            final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(position), -1);
            assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), TEXT), highlighter.getBestFragment(tokenStream, TEXT));
        }
    } finally {
        indexReader.close();
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TokenStream(org.apache.lucene.analysis.TokenStream) Query(org.apache.lucene.search.Query) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) SpanQuery(org.apache.lucene.search.spans.SpanQuery) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery) Document(org.apache.lucene.document.Document) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) SimpleCollector(org.apache.lucene.search.SimpleCollector) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SpanTermQuery(org.apache.lucene.search.spans.SpanTermQuery) FixedBitSet(org.apache.lucene.util.FixedBitSet) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Directory(org.apache.lucene.store.Directory) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) FieldType(org.apache.lucene.document.FieldType) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) SpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery)

Example 69 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class HighlighterPhraseTest method testStopWords.

//shows the need to sum the increments in WeightedSpanTermExtractor
public void testStopWords() throws IOException, InvalidTokenOffsetsException {
    MockAnalyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
    final String TEXT = "the ab the the cd the the the ef the";
    final Directory directory = newDirectory();
    try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(stopAnalyzer))) {
        final Document document = new Document();
        document.add(newTextField(FIELD, TEXT, Store.YES));
        indexWriter.addDocument(document);
    }
    try (IndexReader indexReader = DirectoryReader.open(directory)) {
        assertEquals(1, indexReader.numDocs());
        final IndexSearcher indexSearcher = newSearcher(indexReader);
        //equivalent of "ab the the cd the the the ef"
        final PhraseQuery phraseQuery = new PhraseQuery.Builder().add(new Term(FIELD, "ab"), 0).add(new Term(FIELD, "cd"), 3).add(new Term(FIELD, "ef"), 7).build();
        TopDocs hits = indexSearcher.search(phraseQuery, 100);
        assertEquals(1, hits.totalHits);
        final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery));
        assertEquals(1, highlighter.getBestFragments(stopAnalyzer, FIELD, TEXT, 10).length);
    } finally {
        directory.close();
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) IndexReader(org.apache.lucene.index.IndexReader) Directory(org.apache.lucene.store.Directory)

Example 70 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testHighlightingWithDefaultField.

public void testHighlightingWithDefaultField() throws Exception {
    String s1 = "I call our world Flatland, not because we call it so,";
    // Verify that a query against the default field results in text being
    // highlighted
    // regardless of the field name.
    PhraseQuery q = new PhraseQuery(3, FIELD_NAME, "world", "flatland");
    String expected = "I call our <B>world</B> <B>Flatland</B>, not because we call it so,";
    String observed = highlightField(q, "SOME_FIELD_NAME", s1);
    if (VERBOSE)
        System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
    assertEquals("Query in the default field results in text for *ANY* field being highlighted", expected, observed);
    // Verify that a query against a named field does not result in any
    // highlighting
    // when the query field name differs from the name of the field being
    // highlighted,
    // which in this example happens to be the default field name.
    q = new PhraseQuery(3, "text", "world", "flatland");
    expected = s1;
    observed = highlightField(q, FIELD_NAME, s1);
    if (VERBOSE)
        System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + observed);
    assertEquals("Query in a named field does not result in highlighting when that field isn't in the query", s1, highlightField(q, FIELD_NAME, s1));
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)105 Term (org.apache.lucene.index.Term)56 TermQuery (org.apache.lucene.search.TermQuery)43 BooleanQuery (org.apache.lucene.search.BooleanQuery)39 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)37 Document (org.apache.lucene.document.Document)36 Query (org.apache.lucene.search.Query)30 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 TopDocs (org.apache.lucene.search.TopDocs)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 BoostQuery (org.apache.lucene.search.BoostQuery)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 ArrayList (java.util.ArrayList)11 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10