use of com.greplin.lucene.util.Intersection in project greplin-lucene-utils by Cue.
the class PhraseFilter method getDocIdSet.
@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
List<IndexReader> subReaders = IndexReaders.gatherSubReaders(reader);
PhraseFilterMatchList[] results = new PhraseFilterMatchList[subReaders.size()];
int matchCount = 0;
int readerNumber = 0;
for (IndexReader subReader : subReaders) {
SortedSet<TermWithFrequency> termsOrderedByFrequency = Sets.newTreeSet();
for (int i = 0; i < this.terms.length; i++) {
Term t = this.terms[i];
termsOrderedByFrequency.add(new TermWithFrequency(t, subReader.docFreq(t), i));
}
PhraseFilterMatchList matches = null;
TermPositions termPositions = subReader.termPositions();
try {
for (TermWithFrequency term : termsOrderedByFrequency) {
if (term.docFreq == 0) {
break;
}
termPositions.seek(term.term);
if (matches == null) {
// If this is the first term, collect all matches that intersect
// with the provided initial document set.
Intersection intersection = this.intersectionProvider.get(reader);
matches = new PhraseFilterMatchList(term.docFreq);
while (intersection.advanceToNextIntersection(termPositions)) {
int freq = termPositions.freq();
PhraseFilterIntList list = new PhraseFilterIntList(freq);
for (int i = 0; i < freq; i++) {
list.add(termPositions.nextPosition() - term.offset);
}
matches.add(termPositions.doc(), list);
}
} else {
// Otherwise, intersect with the existing matches.
matches.intersect(termPositions, term.offset);
}
if (matches.getCount() == 0) {
break;
}
}
} finally {
termPositions.close();
}
if (matches != null) {
results[readerNumber] = matches;
matchCount += matches.getCount();
}
readerNumber++;
}
// 2^5 = 32
final int bitsPerIntPowerLogTwo = 5;
if (matchCount > reader.maxDoc() >> bitsPerIntPowerLogTwo) {
FixedBitSet result = new FixedBitSet(reader.maxDoc());
int readerOffset = 0;
for (int readerIndex = 0; readerIndex < results.length; readerIndex++) {
PhraseFilterMatchList matches = results[readerIndex];
if (matches != null) {
int count = matches.getCount();
int[] docIds = matches.getDocIds();
for (int i = 0; i < count; i++) {
result.set(docIds[i] + readerOffset);
}
}
readerOffset += subReaders.get(readerIndex).maxDoc();
}
return result;
} else if (matchCount == 0) {
return DocIdSets.EMPTY;
} else {
int[] result = new int[matchCount];
int base = 0;
int readerOffset = 0;
for (int readerIndex = 0; readerIndex < results.length; readerIndex++) {
PhraseFilterMatchList matches = results[readerIndex];
if (matches != null) {
int count = matches.getCount();
int[] docIds = matches.getDocIds();
for (int i = 0; i < count; i++) {
result[base + i] = docIds[i] + readerOffset;
}
base += count;
}
readerOffset += subReaders.get(readerIndex).maxDoc();
}
return new SortedIntArrayDocIdSet(result);
}
}
Aggregations