Search in sources :

Example 91 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestQPHelper method testCJKBoostedPhrase.

public void testCJKBoostedPhrase() throws Exception {
    // individual CJK chars as terms
    SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
    Query expected = new PhraseQuery("field", "中", "国");
    expected = new BoostQuery(expected, 0.5f);
    assertEquals(expected, getQuery("\"中国\"^0.5", analyzer));
}
Also used : Query(org.apache.lucene.search.Query) MatchNoDocsQuery(org.apache.lucene.search.MatchNoDocsQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) RegexpQuery(org.apache.lucene.search.RegexpQuery) MultiTermQuery(org.apache.lucene.search.MultiTermQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) WildcardQuery(org.apache.lucene.search.WildcardQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) TermRangeQuery(org.apache.lucene.search.TermRangeQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) MultiPhraseQuery(org.apache.lucene.search.MultiPhraseQuery) BoostQuery(org.apache.lucene.search.BoostQuery)

Example 92 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestIndexWriter method testStopwordsPosIncHole2.

// LUCENE-3849
public void testStopwordsPosIncHole2() throws Exception {
    // use two stopfilters for testing here
    Directory dir = newDirectory();
    final Automaton secondSet = Automata.makeString("foobar");
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
            stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
    Document doc = new Document();
    doc.add(new TextField("body", "just a foobar", Field.Store.NO));
    doc.add(new TextField("body", "test of gaps", Field.Store.NO));
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher is = newSearcher(ir);
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.add(new Term("body", "just"), 0);
    builder.add(new Term("body", "test"), 3);
    PhraseQuery pq = builder.build();
    // body:"just ? ? test"
    assertEquals(1, is.search(pq, 5).totalHits);
    ir.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Automaton(org.apache.lucene.util.automaton.Automaton) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) MockTokenFilter(org.apache.lucene.analysis.MockTokenFilter) PhraseQuery(org.apache.lucene.search.PhraseQuery) CharacterRunAutomaton(org.apache.lucene.util.automaton.CharacterRunAutomaton) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TextField(org.apache.lucene.document.TextField) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) MMapDirectory(org.apache.lucene.store.MMapDirectory) Directory(org.apache.lucene.store.Directory) RAMDirectory(org.apache.lucene.store.RAMDirectory) FSDirectory(org.apache.lucene.store.FSDirectory) SimpleFSDirectory(org.apache.lucene.store.SimpleFSDirectory) NIOFSDirectory(org.apache.lucene.store.NIOFSDirectory)

Example 93 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project textdb by TextDB.

the class KeywordMatcherSourceOperator method buildPhraseQuery.

private Query buildPhraseQuery() throws DataFlowException {
    BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
    for (String attributeName : this.predicate.getAttributeNames()) {
        AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getAttributeType();
        // types other than TEXT and STRING: throw Exception for now
        if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
            throw new DataFlowException("KeywordPredicate: Fields other than STRING and TEXT are not supported yet");
        }
        if (attributeType == AttributeType.STRING) {
            Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery()));
            booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
        }
        if (attributeType == AttributeType.TEXT) {
            if (queryTokenList.size() == 1) {
                Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery().toLowerCase()));
                booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
            } else {
                PhraseQuery.Builder phraseQueryBuilder = new PhraseQuery.Builder();
                for (int i = 0; i < queryTokensWithStopwords.size(); i++) {
                    if (!StandardAnalyzer.STOP_WORDS_SET.contains(queryTokensWithStopwords.get(i))) {
                        phraseQueryBuilder.add(new Term(attributeName, queryTokensWithStopwords.get(i).toLowerCase()), i);
                    }
                }
                PhraseQuery phraseQuery = phraseQueryBuilder.build();
                booleanQueryBuilder.add(phraseQuery, BooleanClause.Occur.SHOULD);
            }
        }
    }
    return booleanQueryBuilder.build();
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) Term(org.apache.lucene.index.Term)

Example 94 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestSimpleQueryParser method testPhraseWithSlop.

/** test a simple phrase with various slop settings */
public void testPhraseWithSlop() throws Exception {
    PhraseQuery expectedWithSlop = new PhraseQuery(2, "field", "foo", "bar");
    assertEquals(expectedWithSlop, parse("\"foo bar\"~2"));
    PhraseQuery expectedWithMultiDigitSlop = new PhraseQuery(10, "field", "foo", "bar");
    assertEquals(expectedWithMultiDigitSlop, parse("\"foo bar\"~10"));
    PhraseQuery expectedNoSlop = new PhraseQuery("field", "foo", "bar");
    assertEquals("Ignore trailing tilde with no slop", expectedNoSlop, parse("\"foo bar\"~"));
    assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, parse("\"foo bar\"~a"));
    assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, parse("\"foo bar\"~1a"));
    assertEquals("Ignore negative trailing slop", expectedNoSlop, parse("\"foo bar\"~-1"));
    PhraseQuery pq = new PhraseQuery(12, "field", "foo", "bar");
    BooleanQuery.Builder expectedBoolean = new BooleanQuery.Builder();
    expectedBoolean.add(pq, Occur.MUST);
    expectedBoolean.add(new TermQuery(new Term("field", "baz")), Occur.MUST);
    assertEquals(expectedBoolean.build(), parse("\"foo bar\"~12 baz"));
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) TermQuery(org.apache.lucene.search.TermQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term)

Example 95 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestSimpleQueryParser method testPhrase.

/** test a simple phrase */
public void testPhrase() throws Exception {
    PhraseQuery expected = new PhraseQuery("field", "foo", "bar");
    assertEquals(expected, parse("\"foo bar\""));
}
Also used : PhraseQuery(org.apache.lucene.search.PhraseQuery)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)105 Term (org.apache.lucene.index.Term)56 TermQuery (org.apache.lucene.search.TermQuery)43 BooleanQuery (org.apache.lucene.search.BooleanQuery)39 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)37 Document (org.apache.lucene.document.Document)36 Query (org.apache.lucene.search.Query)30 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 TopDocs (org.apache.lucene.search.TopDocs)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 BoostQuery (org.apache.lucene.search.BoostQuery)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 ArrayList (java.util.ArrayList)11 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10