Search in sources :

Example 21 with Occur

use of org.apache.lucene.search.BooleanClause.Occur in project lucene-solr by apache.

the class CommonTermsQueryTest method testRandomIndex.

public void testRandomIndex() throws IOException {
    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    createRandomIndex(atLeast(50), w, random().nextLong());
    w.forceMerge(1);
    DirectoryReader reader = w.getReader();
    LeafReader wrapper = getOnlyLeafReader(reader);
    String field = "body";
    Terms terms = wrapper.terms(field);
    PriorityQueue<TermAndFreq> lowFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {

        @Override
        protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
            return a.freq > b.freq;
        }
    };
    PriorityQueue<TermAndFreq> highFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {

        @Override
        protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
            return a.freq < b.freq;
        }
    };
    try {
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            if (highFreqQueue.size() < 5) {
                highFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
                lowFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
            } else {
                if (highFreqQueue.top().freq < iterator.docFreq()) {
                    highFreqQueue.top().freq = iterator.docFreq();
                    highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
                    highFreqQueue.updateTop();
                }
                if (lowFreqQueue.top().freq > iterator.docFreq()) {
                    lowFreqQueue.top().freq = iterator.docFreq();
                    lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
                    lowFreqQueue.updateTop();
                }
            }
        }
        int lowFreq = lowFreqQueue.top().freq;
        int highFreq = highFreqQueue.top().freq;
        assumeTrue("unlucky index", highFreq - 1 > lowFreq);
        List<TermAndFreq> highTerms = queueToList(highFreqQueue);
        List<TermAndFreq> lowTerms = queueToList(lowFreqQueue);
        IndexSearcher searcher = newSearcher(reader);
        Occur lowFreqOccur = randomOccur(random());
        BooleanQuery.Builder verifyQuery = new BooleanQuery.Builder();
        CommonTermsQuery cq = new CommonTermsQuery(randomOccur(random()), lowFreqOccur, highFreq - 1);
        for (TermAndFreq termAndFreq : lowTerms) {
            cq.add(new Term(field, termAndFreq.term));
            verifyQuery.add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
        }
        for (TermAndFreq termAndFreq : highTerms) {
            cq.add(new Term(field, termAndFreq.term));
        }
        TopDocs cqSearch = searcher.search(cq, reader.maxDoc());
        TopDocs verifySearch = searcher.search(verifyQuery.build(), reader.maxDoc());
        assertEquals(verifySearch.totalHits, cqSearch.totalHits);
        Set<Integer> hits = new HashSet<>();
        for (ScoreDoc doc : verifySearch.scoreDocs) {
            hits.add(doc.doc);
        }
        for (ScoreDoc doc : cqSearch.scoreDocs) {
            assertTrue(hits.remove(doc.doc));
        }
        assertTrue(hits.isEmpty());
        /*
       *  need to force merge here since QueryUtils adds checks based
       *  on leave readers which have different statistics than the top
       *  level reader if we have more than one segment. This could 
       *  result in a different query / results.
       */
        w.forceMerge(1);
        DirectoryReader reader2 = w.getReader();
        QueryUtils.check(random(), cq, newSearcher(reader2));
        reader2.close();
    } finally {
        IOUtils.close(reader, w, dir, analyzer);
    }
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) BooleanQuery(org.apache.lucene.search.BooleanQuery) Occur(org.apache.lucene.search.BooleanClause.Occur) TermsEnum(org.apache.lucene.index.TermsEnum) ScoreDoc(org.apache.lucene.search.ScoreDoc) TopDocs(org.apache.lucene.search.TopDocs) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet) TermQuery(org.apache.lucene.search.TermQuery) LeafReader(org.apache.lucene.index.LeafReader) DirectoryReader(org.apache.lucene.index.DirectoryReader) Terms(org.apache.lucene.index.Terms) Term(org.apache.lucene.index.Term) PriorityQueue(org.apache.lucene.util.PriorityQueue) BooleanClause(org.apache.lucene.search.BooleanClause) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Aggregations

Occur (org.apache.lucene.search.BooleanClause.Occur)21 Collection (java.util.Collection)10 EnumMap (java.util.EnumMap)10 Term (org.apache.lucene.index.Term)5 Query (org.apache.lucene.search.Query)5 ArrayList (java.util.ArrayList)4 BooleanQuery (org.apache.lucene.search.BooleanQuery)4 MatchNoDocsQuery (org.apache.lucene.search.MatchNoDocsQuery)3 TermQuery (org.apache.lucene.search.TermQuery)3 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)2 BooleanClause (org.apache.lucene.search.BooleanClause)2 IndexSearcher (org.apache.lucene.search.IndexSearcher)2 Directory (org.apache.lucene.store.Directory)2 StringReader (java.io.StringReader)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 TreeMap (java.util.TreeMap)1 Matcher (java.util.regex.Matcher)1 Analyzer (org.apache.lucene.analysis.Analyzer)1