use of org.apache.lucene.search.BooleanClause.Occur in project lucene-solr by apache.
the class CommonTermsQueryTest method testRandomIndex.
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
createRandomIndex(atLeast(50), w, random().nextLong());
w.forceMerge(1);
DirectoryReader reader = w.getReader();
LeafReader wrapper = getOnlyLeafReader(reader);
String field = "body";
Terms terms = wrapper.terms(field);
PriorityQueue<TermAndFreq> lowFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {
@Override
protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
return a.freq > b.freq;
}
};
PriorityQueue<TermAndFreq> highFreqQueue = new PriorityQueue<CommonTermsQueryTest.TermAndFreq>(5) {
@Override
protected boolean lessThan(TermAndFreq a, TermAndFreq b) {
return a.freq < b.freq;
}
};
try {
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
if (highFreqQueue.size() < 5) {
highFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
lowFreqQueue.add(new TermAndFreq(BytesRef.deepCopyOf(iterator.term()), iterator.docFreq()));
} else {
if (highFreqQueue.top().freq < iterator.docFreq()) {
highFreqQueue.top().freq = iterator.docFreq();
highFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
highFreqQueue.updateTop();
}
if (lowFreqQueue.top().freq > iterator.docFreq()) {
lowFreqQueue.top().freq = iterator.docFreq();
lowFreqQueue.top().term = BytesRef.deepCopyOf(iterator.term());
lowFreqQueue.updateTop();
}
}
}
int lowFreq = lowFreqQueue.top().freq;
int highFreq = highFreqQueue.top().freq;
assumeTrue("unlucky index", highFreq - 1 > lowFreq);
List<TermAndFreq> highTerms = queueToList(highFreqQueue);
List<TermAndFreq> lowTerms = queueToList(lowFreqQueue);
IndexSearcher searcher = newSearcher(reader);
Occur lowFreqOccur = randomOccur(random());
BooleanQuery.Builder verifyQuery = new BooleanQuery.Builder();
CommonTermsQuery cq = new CommonTermsQuery(randomOccur(random()), lowFreqOccur, highFreq - 1);
for (TermAndFreq termAndFreq : lowTerms) {
cq.add(new Term(field, termAndFreq.term));
verifyQuery.add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
}
for (TermAndFreq termAndFreq : highTerms) {
cq.add(new Term(field, termAndFreq.term));
}
TopDocs cqSearch = searcher.search(cq, reader.maxDoc());
TopDocs verifySearch = searcher.search(verifyQuery.build(), reader.maxDoc());
assertEquals(verifySearch.totalHits, cqSearch.totalHits);
Set<Integer> hits = new HashSet<>();
for (ScoreDoc doc : verifySearch.scoreDocs) {
hits.add(doc.doc);
}
for (ScoreDoc doc : cqSearch.scoreDocs) {
assertTrue(hits.remove(doc.doc));
}
assertTrue(hits.isEmpty());
/*
* need to force merge here since QueryUtils adds checks based
* on leave readers which have different statistics than the top
* level reader if we have more than one segment. This could
* result in a different query / results.
*/
w.forceMerge(1);
DirectoryReader reader2 = w.getReader();
QueryUtils.check(random(), cq, newSearcher(reader2));
reader2.close();
} finally {
IOUtils.close(reader, w, dir, analyzer);
}
}
Aggregations