Search in sources :

Example 31 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testSimpleSpanFragmenter.

public void testSimpleSpanFragmenter() throws Exception {
    Builder builder = new PhraseQuery.Builder();
    builder.add(new Term(FIELD_NAME, "piece"), 0);
    builder.add(new Term(FIELD_NAME, "text"), 2);
    builder.add(new Term(FIELD_NAME, "very"), 5);
    builder.add(new Term(FIELD_NAME, "long"), 6);
    PhraseQuery phraseQuery = builder.build();
    doSearching(phraseQuery);
    int maxNumFragmentsRequired = 2;
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    phraseQuery = new PhraseQuery(FIELD_NAME, "been", "shot");
    doSearching(query);
    maxNumFragmentsRequired = 2;
    scorer = new QueryScorer(query, FIELD_NAME);
    highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
        highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
    }
}
Also used : CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint)

Example 32 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SynonymTokenizer method testSimpleQueryScorerPhraseHighlighting.

public void testSimpleQueryScorerPhraseHighlighting() throws Exception {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(new Term(FIELD_NAME, "very"), 0);
    builder.add(new Term(FIELD_NAME, "long"), 1);
    builder.add(new Term(FIELD_NAME, "contains"), 3);
    PhraseQuery phraseQuery = builder.build();
    doSearching(phraseQuery);
    int maxNumFragmentsRequired = 2;
    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
    Highlighter highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        highlighter.setTextFragmenter(new SimpleFragmenter(40));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 3);
    numHighlights = 0;
    builder = new PhraseQuery.Builder();
    builder.add(new Term(FIELD_NAME, "piece"), 1);
    builder.add(new Term(FIELD_NAME, "text"), 3);
    builder.add(new Term(FIELD_NAME, "refers"), 4);
    builder.add(new Term(FIELD_NAME, "kennedy"), 6);
    phraseQuery = builder.build();
    doSearching(phraseQuery);
    maxNumFragmentsRequired = 2;
    scorer = new QueryScorer(query, FIELD_NAME);
    highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        highlighter.setTextFragmenter(new SimpleFragmenter(40));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
    numHighlights = 0;
    builder = new PhraseQuery.Builder();
    builder.add(new Term(FIELD_NAME, "lets"), 0);
    builder.add(new Term(FIELD_NAME, "lets"), 4);
    builder.add(new Term(FIELD_NAME, "lets"), 8);
    builder.add(new Term(FIELD_NAME, "lets"), 12);
    phraseQuery = builder.build();
    doSearching(phraseQuery);
    maxNumFragmentsRequired = 2;
    scorer = new QueryScorer(query, FIELD_NAME);
    highlighter = new Highlighter(this, scorer);
    for (int i = 0; i < hits.totalHits; i++) {
        final int docId = hits.scoreDocs[i].doc;
        final Document doc = searcher.doc(docId);
        String text = doc.get(FIELD_NAME);
        TokenStream tokenStream = getAnyTokenStream(FIELD_NAME, docId);
        highlighter.setTextFragmenter(new SimpleFragmenter(40));
        String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
        if (VERBOSE)
            System.out.println("\t" + result);
    }
    assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 4);
}
Also used : Builder(org.apache.lucene.search.PhraseQuery.Builder) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) Builder(org.apache.lucene.search.PhraseQuery.Builder) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint)

Example 33 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestMemoryIndexAgainstRAMDir method testSameFieldAddedMultipleTimes.

public void testSameFieldAddedMultipleTimes() throws IOException {
    MemoryIndex mindex = randomMemoryIndex();
    MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
    mindex.addField("field", "the quick brown fox", mockAnalyzer);
    mindex.addField("field", "jumps over the", mockAnalyzer);
    LeafReader reader = (LeafReader) mindex.createSearcher().getIndexReader();
    TestUtil.checkReader(reader);
    assertEquals(7, reader.terms("field").getSumTotalTermFreq());
    PhraseQuery query = new PhraseQuery("field", "fox", "jumps");
    assertTrue(mindex.search(query) > 0.1);
    mindex.reset();
    mockAnalyzer.setPositionIncrementGap(1 + random().nextInt(10));
    mindex.addField("field", "the quick brown fox", mockAnalyzer);
    mindex.addField("field", "jumps over the", mockAnalyzer);
    assertEquals(0, mindex.search(query), 0.00001f);
    query = new PhraseQuery(10, "field", "fox", "jumps");
    assertTrue("posGap" + mockAnalyzer.getPositionIncrementGap("field"), mindex.search(query) > 0.0001);
    TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
Also used : MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PhraseQuery(org.apache.lucene.search.PhraseQuery)

Example 34 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestMemoryIndex method testBuildFromDocument.

@Test
public void testBuildFromDocument() {
    Document doc = new Document();
    doc.add(new TextField("field1", "some text", Field.Store.NO));
    doc.add(new TextField("field1", "some more text", Field.Store.NO));
    doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
    analyzer.setPositionIncrementGap(100);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
    assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
    assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
    assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
    assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) StringField(org.apache.lucene.document.StringField) TextField(org.apache.lucene.document.TextField) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) Test(org.junit.Test)

Example 35 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestQPHelper method testSimple.

public void testSimple() throws Exception {
    assertQueryEquals("field=a", null, "a");
    assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
    assertQueryEquals("term term term", null, "term term term");
    assertQueryEquals("t�rm term term", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), "t�rm term term");
    assertQueryEquals("�mlaut", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), "�mlaut");
    // FIXME: change MockAnalyzer to not extend CharTokenizer for this test
    //assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
    //assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
    assertQueryEquals("a AND b", null, "+a +b");
    assertQueryEquals("(a AND b)", null, "+a +b");
    assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
    assertQueryEquals("a AND NOT b", null, "+a -b");
    assertQueryEquals("a AND -b", null, "+a -b");
    assertQueryEquals("a AND !b", null, "+a -b");
    assertQueryEquals("a && b", null, "+a +b");
    assertQueryEquals("a && ! b", null, "+a -b");
    assertQueryEquals("a OR b", null, "a b");
    assertQueryEquals("a || b", null, "a b");
    assertQueryEquals("a OR !b", null, "a -b");
    assertQueryEquals("a OR ! b", null, "a -b");
    assertQueryEquals("a OR -b", null, "a -b");
    assertQueryEquals("+term -term term", null, "+term -term term");
    assertQueryEquals("foo:term AND field:anotherTerm", null, "+foo:term +anotherterm");
    assertQueryEquals("term AND \"phrase phrase\"", null, "+term +\"phrase phrase\"");
    assertQueryEquals("\"hello there\"", null, "\"hello there\"");
    assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
    assertTrue(getQuery("hello", null) instanceof TermQuery);
    assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
    assertQueryEquals("germ term^2.0", null, "germ (term)^2.0");
    assertQueryEquals("(term)^2.0", null, "(term)^2.0");
    assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
    assertQueryEquals("term^2.0", null, "(term)^2.0");
    assertQueryEquals("term^2", null, "(term)^2.0");
    assertQueryEquals("\"germ term\"^2.0", null, "(\"germ term\")^2.0");
    assertQueryEquals("\"term germ\"^2", null, "(\"term germ\")^2.0");
    assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, "+(foo bar) +(baz boo)");
    assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
    assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, "+(apple \"steve jobs\") -(foo bar baz)");
    assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\"");
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) TermQuery(org.apache.lucene.search.TermQuery) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)97 Term (org.apache.lucene.index.Term)51 TermQuery (org.apache.lucene.search.TermQuery)39 BooleanQuery (org.apache.lucene.search.BooleanQuery)36 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)36 Document (org.apache.lucene.document.Document)34 Query (org.apache.lucene.search.Query)26 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 TopDocs (org.apache.lucene.search.TopDocs)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 BoostQuery (org.apache.lucene.search.BoostQuery)13 ArrayList (java.util.ArrayList)10 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10