use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.
the class LRUQueryCache method cacheIntoBitSet.
private static DocIdSet cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException {
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
long[] cost = new long[1];
scorer.score(new LeafCollector() {
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
cost[0]++;
bitSet.set(doc);
}
}, null);
return new BitDocIdSet(bitSet, cost[0]);
}
use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.
the class TestConjunctionDISI method testConjunctionApproximation.
// Test that the conjunction approximation is correct
public void testConjunctionApproximation() throws IOException {
final int iters = atLeast(100);
for (int iter = 0; iter < iters; ++iter) {
final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
final int numIterators = TestUtil.nextInt(random(), 2, 5);
final FixedBitSet[] sets = new FixedBitSet[numIterators];
final Scorer[] iterators = new Scorer[numIterators];
boolean hasApproximation = false;
for (int i = 0; i < iterators.length; ++i) {
final FixedBitSet set = randomSet(maxDoc);
if (random().nextBoolean()) {
// simple iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
} else {
// scorer with approximation
final FixedBitSet confirmed = clearRandomBits(set);
sets[i] = confirmed;
final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
iterators[i] = scorer(approximation);
hasApproximation = true;
}
}
final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
assertEquals(hasApproximation, twoPhaseIterator != null);
if (hasApproximation) {
assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
}
}
}
use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.
the class TestConjunctionDISI method testCollapseSubConjunctions.
public void testCollapseSubConjunctions(boolean wrapWithScorer) throws IOException {
final int iters = atLeast(100);
for (int iter = 0; iter < iters; ++iter) {
final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
final int numIterators = TestUtil.nextInt(random(), 5, 10);
final FixedBitSet[] sets = new FixedBitSet[numIterators];
final List<Scorer> scorers = new LinkedList<>();
for (int i = 0; i < numIterators; ++i) {
final FixedBitSet set = randomSet(maxDoc);
if (random().nextBoolean()) {
// simple iterator
sets[i] = set;
scorers.add(new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator()));
} else {
// scorer with approximation
final FixedBitSet confirmed = clearRandomBits(set);
sets[i] = confirmed;
final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
scorers.add(scorer(approximation));
}
}
// make some sub sequences into sub conjunctions
final int subIters = atLeast(3);
for (int subIter = 0; subIter < subIters && scorers.size() > 3; ++subIter) {
final int subSeqStart = TestUtil.nextInt(random(), 0, scorers.size() - 2);
final int subSeqEnd = TestUtil.nextInt(random(), subSeqStart + 2, scorers.size());
List<Scorer> subIterators = scorers.subList(subSeqStart, subSeqEnd);
Scorer subConjunction;
if (wrapWithScorer) {
subConjunction = new ConjunctionScorer(null, subIterators, Collections.emptyList());
} else {
subConjunction = new ConstantScoreScorer(null, 0f, ConjunctionDISI.intersectScorers(subIterators));
}
scorers.set(subSeqStart, subConjunction);
int toRemove = subSeqEnd - subSeqStart - 1;
while (toRemove-- > 0) {
scorers.remove(subSeqStart + 1);
}
}
if (scorers.size() == 1) {
// ConjunctionDISI needs two iterators
scorers.add(new ConstantScoreScorer(null, 0f, DocIdSetIterator.all(maxDoc)));
}
final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(scorers);
assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
}
}
use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.
the class TestConjunctionDISI method testConjunction.
// Test that the conjunction iterator is correct
public void testConjunction() throws IOException {
final int iters = atLeast(100);
for (int iter = 0; iter < iters; ++iter) {
final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
final int numIterators = TestUtil.nextInt(random(), 2, 5);
final FixedBitSet[] sets = new FixedBitSet[numIterators];
final Scorer[] iterators = new Scorer[numIterators];
for (int i = 0; i < iterators.length; ++i) {
final FixedBitSet set = randomSet(maxDoc);
switch(random().nextInt(3)) {
case 0:
// simple iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, anonymizeIterator(new BitDocIdSet(set).iterator()));
break;
case 1:
// bitSet iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
break;
default:
// scorer with approximation
final FixedBitSet confirmed = clearRandomBits(set);
sets[i] = confirmed;
final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
iterators[i] = scorer(approximation);
break;
}
}
final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
}
}
use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.
the class RandomSamplingFacetsCollector method createSample.
/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
int maxdoc = docs.context.reader().maxDoc();
// TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
int binSize = (int) (1.0 / samplingRate);
try {
int counter = 0;
int limit, randomIndex;
if (leftoverBin != NOT_CALCULATED) {
limit = leftoverBin;
// either NOT_CALCULATED, which means we already sampled from that bin,
// or the next document to sample
randomIndex = leftoverIndex;
} else {
limit = binSize;
randomIndex = random.nextInt(binSize);
}
final DocIdSetIterator it = docs.bits.iterator();
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
if (counter == randomIndex) {
sampleDocs.set(doc);
}
counter++;
if (counter >= limit) {
counter = 0;
limit = binSize;
randomIndex = random.nextInt(binSize);
}
}
if (counter == 0) {
// we either exhausted the bin and the iterator at the same time, or
// this segment had no results. in the latter case we might want to
// carry leftover to the next segment as is, but that complicates the
// code and doesn't seem so important.
leftoverBin = leftoverIndex = NOT_CALCULATED;
} else {
leftoverBin = limit - counter;
if (randomIndex > counter) {
// the document to sample is in the next bin
leftoverIndex = randomIndex - counter;
} else if (randomIndex < counter) {
// we sampled a document from the bin, so just skip over remaining
// documents in the bin in the next segment.
leftoverIndex = NOT_CALCULATED;
}
}
return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
Aggregations