Search in sources :

Example 41 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class SolrPluginUtilsTest method testDisjunctionMaxQueryParser.

@Test
public void testDisjunctionMaxQueryParser() throws Exception {
    Query out;
    String t;
    SolrQueryRequest req = req("df", "text");
    QParser qparser = QParser.getParser("hi", "dismax", req);
    DisjunctionMaxQueryParser qp = new SolrPluginUtils.DisjunctionMaxQueryParser(qparser, req.getParams().get("df"));
    qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts("title^2.0 title_stemmed name^1.2 subject^0.5"));
    qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
    qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts("subject^0.5 sind^1.5"));
    /* first some sanity tests that don't use aliasing at all */
    t = "XXXXXXXX";
    out = qp.parse(t);
    assertNotNull(t + " sanity test gave back null", out);
    assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
    assertEquals(t + " sanity test is wrong field", qp.getDefaultField(), ((TermQuery) out).getTerm().field());
    t = "subject:XXXXXXXX";
    out = qp.parse(t);
    assertNotNull(t + " sanity test gave back null", out);
    assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
    assertEquals(t + " sanity test is wrong field", "subject", ((TermQuery) out).getTerm().field());
    /* field has untokenzied type, so this should be a term anyway */
    t = "sind:\"simple phrase\"";
    out = qp.parse(t);
    assertNotNull(t + " sanity test gave back null", out);
    assertTrue(t + " sanity test isn't TermQuery: " + out.getClass(), out instanceof TermQuery);
    assertEquals(t + " sanity test is wrong field", "sind", ((TermQuery) out).getTerm().field());
    t = "subject:\"simple phrase\"";
    out = qp.parse(t);
    assertNotNull(t + " sanity test gave back null", out);
    assertTrue(t + " sanity test isn't PhraseQuery: " + out.getClass(), out instanceof PhraseQuery);
    assertEquals(t + " sanity test is wrong field", "subject", ((PhraseQuery) out).getTerms()[0].field());
    /* now some tests that use aliasing */
    /* basic usage of single "term" */
    t = "hoss:XXXXXXXX";
    out = qp.parse(t);
    assertNotNull(t + " was null", out);
    assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
    assertEquals(t + " wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) out).iterator()));
    /* odd case, but should still work, DMQ of one clause */
    t = "test:YYYYY";
    out = qp.parse(t);
    assertNotNull(t + " was null", out);
    assertTrue(t + " wasn't a DMQ:" + out.getClass(), out instanceof DisjunctionMaxQuery);
    assertEquals(t + " wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) out).iterator()));
    /* basic usage of multiple "terms" */
    t = "hoss:XXXXXXXX test:YYYYY";
    out = qp.parse(t);
    assertNotNull(t + " was null", out);
    assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
    {
        BooleanQuery bq = (BooleanQuery) out;
        List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
        assertEquals(t + " wrong number of clauses", 2, clauses.size());
        Query sub = clauses.get(0).getQuery();
        assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
        assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
        sub = clauses.get(1).getQuery();
        assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
        assertEquals(t + " second had wrong number of clauses", 1, countItems(((DisjunctionMaxQuery) sub).iterator()));
    }
    /* a phrase, and a term that is a stop word for some fields */
    t = "hoss:\"XXXXXX YYYYY\" hoss:the";
    out = qp.parse(t);
    assertNotNull(t + " was null", out);
    assertTrue(t + " wasn't a boolean:" + out.getClass(), out instanceof BooleanQuery);
    {
        BooleanQuery bq = (BooleanQuery) out;
        List<BooleanClause> clauses = new ArrayList<>(bq.clauses());
        assertEquals(t + " wrong number of clauses", 2, clauses.size());
        Query sub = clauses.get(0).getQuery();
        assertTrue(t + " first wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
        assertEquals(t + " first had wrong number of clauses", 4, countItems(((DisjunctionMaxQuery) sub).iterator()));
        sub = clauses.get(1).getQuery();
        assertTrue(t + " second wasn't a DMQ:" + sub.getClass(), sub instanceof DisjunctionMaxQuery);
        assertEquals(t + " second had wrong number of clauses (stop words)", 2, countItems(((DisjunctionMaxQuery) sub).iterator()));
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) DisjunctionMaxQuery(org.apache.lucene.search.DisjunctionMaxQuery) QParser(org.apache.solr.search.QParser) ArrayList(java.util.ArrayList) List(java.util.List) DisjunctionMaxQueryParser(org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser) Test(org.junit.Test)

Example 42 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project jackrabbit by apache.

the class AbstractExcerpt method getQueryTerms.

private static void getQueryTerms(Query q, Set<Term[]> relevantTerms) {
    if (q instanceof BooleanQuery) {
        final BooleanQuery bq = (BooleanQuery) q;
        for (BooleanClause clause : bq.getClauses()) {
            getQueryTerms(clause.getQuery(), relevantTerms);
        }
        return;
    }
    //need to preserve insertion order
    Set<Term> extractedTerms = new LinkedHashSet<Term>();
    q.extractTerms(extractedTerms);
    Set<Term> filteredTerms = filterRelevantTerms(extractedTerms);
    if (!filteredTerms.isEmpty()) {
        if (q instanceof PhraseQuery) {
            // inline the terms, basically a 'must all' condition
            relevantTerms.add(filteredTerms.toArray(new Term[] {}));
        } else {
            // each possible term gets a new slot
            for (Term t : filteredTerms) {
                relevantTerms.add(new Term[] { t });
            }
        }
    }
}
Also used : BooleanClause(org.apache.lucene.search.BooleanClause) LinkedHashSet(java.util.LinkedHashSet) BooleanQuery(org.apache.lucene.search.BooleanQuery) PhraseQuery(org.apache.lucene.search.PhraseQuery) Term(org.apache.lucene.index.Term)

Example 43 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestAddIndexes method testWithPendingDeletes2.

public void testWithPendingDeletes2() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();
    setUpDirs(dir, aux);
    IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
    // docs, so 10 pending deletes:
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
        doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
        doc.add(new IntPoint("doc", i));
        doc.add(new IntPoint("doc2d", i, i));
        doc.add(new NumericDocValuesField("dv", i));
        writer.updateDocument(new Term("id", "" + (i % 10)), doc);
    }
    writer.addIndexes(aux);
    // Deletes one of the 10 added docs, leaving 9:
    PhraseQuery q = new PhraseQuery("content", "bbb", "14");
    writer.deleteDocuments(q);
    writer.forceMerge(1);
    writer.commit();
    verifyNumDocs(dir, 1039);
    verifyTermDocs(dir, new Term("content", "aaa"), 1030);
    verifyTermDocs(dir, new Term("content", "bbb"), 9);
    writer.close();
    dir.close();
    aux.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 44 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestAddIndexes method testWithPendingDeletes.

public void testWithPendingDeletes() throws IOException {
    // main directory
    Directory dir = newDirectory();
    // auxiliary directory
    Directory aux = newDirectory();
    setUpDirs(dir, aux);
    IndexWriter writer = newWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
    writer.addIndexes(aux);
    // docs, so 10 pending deletes:
    for (int i = 0; i < 20; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + (i % 10), Field.Store.NO));
        doc.add(newTextField("content", "bbb " + i, Field.Store.NO));
        doc.add(new IntPoint("doc", i));
        doc.add(new IntPoint("doc2d", i, i));
        doc.add(new NumericDocValuesField("dv", i));
        writer.updateDocument(new Term("id", "" + (i % 10)), doc);
    }
    // Deletes one of the 10 added docs, leaving 9:
    PhraseQuery q = new PhraseQuery("content", "bbb", "14");
    writer.deleteDocuments(q);
    writer.forceMerge(1);
    writer.commit();
    verifyNumDocs(dir, 1039);
    verifyTermDocs(dir, new Term("content", "aaa"), 1030);
    verifyTermDocs(dir, new Term("content", "bbb"), 9);
    writer.close();
    dir.close();
    aux.close();
}
Also used : IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) PhraseQuery(org.apache.lucene.search.PhraseQuery) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Example 45 with PhraseQuery

use of org.apache.lucene.search.PhraseQuery in project lucene-solr by apache.

the class TestIndexWriterExceptions method testAddDocsNonAbortingException.

public void testAddDocsNonAbortingException() throws Exception {
    final Directory dir = newDirectory();
    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    final int numDocs1 = random().nextInt(25);
    for (int docCount = 0; docCount < numDocs1; docCount++) {
        Document doc = new Document();
        doc.add(newTextField("content", "good content", Field.Store.NO));
        w.addDocument(doc);
    }
    final List<Document> docs = new ArrayList<>();
    for (int docCount = 0; docCount < 7; docCount++) {
        Document doc = new Document();
        docs.add(doc);
        doc.add(newStringField("id", docCount + "", Field.Store.NO));
        doc.add(newTextField("content", "silly content " + docCount, Field.Store.NO));
        if (docCount == 4) {
            Field f = newTextField("crash", "", Field.Store.NO);
            doc.add(f);
            MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            tokenizer.setReader(new StringReader("crash me on the 4th token"));
            // disable workflow checking as we forcefully close() in exceptional cases.
            tokenizer.setEnableChecks(false);
            f.setTokenStream(new CrashingFilter("crash", tokenizer));
        }
    }
    IOException expected = expectThrows(IOException.class, () -> {
        w.addDocuments(docs);
    });
    assertEquals(CRASH_FAIL_MESSAGE, expected.getMessage());
    final int numDocs2 = random().nextInt(25);
    for (int docCount = 0; docCount < numDocs2; docCount++) {
        Document doc = new Document();
        doc.add(newTextField("content", "good content", Field.Store.NO));
        w.addDocument(doc);
    }
    final IndexReader r = w.getReader();
    w.close();
    final IndexSearcher s = newSearcher(r);
    PhraseQuery pq = new PhraseQuery("content", "silly", "good");
    assertEquals(0, s.search(pq, 1).totalHits);
    pq = new PhraseQuery("content", "good", "content");
    assertEquals(numDocs1 + numDocs2, s.search(pq, 1).totalHits);
    r.close();
    dir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) PhraseQuery(org.apache.lucene.search.PhraseQuery) ArrayList(java.util.ArrayList) FakeIOException(org.apache.lucene.store.MockDirectoryWrapper.FakeIOException) IOException(java.io.IOException) Document(org.apache.lucene.document.Document) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) StringField(org.apache.lucene.document.StringField) SortedNumericDocValuesField(org.apache.lucene.document.SortedNumericDocValuesField) StoredField(org.apache.lucene.document.StoredField) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BinaryDocValuesField(org.apache.lucene.document.BinaryDocValuesField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) StringReader(java.io.StringReader) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory)

Aggregations

PhraseQuery (org.apache.lucene.search.PhraseQuery)97 Term (org.apache.lucene.index.Term)51 TermQuery (org.apache.lucene.search.TermQuery)39 BooleanQuery (org.apache.lucene.search.BooleanQuery)36 MultiPhraseQuery (org.apache.lucene.search.MultiPhraseQuery)36 Document (org.apache.lucene.document.Document)34 Query (org.apache.lucene.search.Query)26 Directory (org.apache.lucene.store.Directory)26 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)25 IndexSearcher (org.apache.lucene.search.IndexSearcher)22 IndexReader (org.apache.lucene.index.IndexReader)20 Field (org.apache.lucene.document.Field)17 TextField (org.apache.lucene.document.TextField)16 SpanTermQuery (org.apache.lucene.search.spans.SpanTermQuery)16 TokenStream (org.apache.lucene.analysis.TokenStream)15 TopDocs (org.apache.lucene.search.TopDocs)14 IndexWriter (org.apache.lucene.index.IndexWriter)13 BoostQuery (org.apache.lucene.search.BoostQuery)13 ArrayList (java.util.ArrayList)10 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)10