Examples with BitDocIdSet - org.apache.lucene.util.BitDocIdSet

Example 1 with BitDocIdSet

use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.

the class LRUQueryCache method cacheIntoBitSet.

private static DocIdSet cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException {
    final FixedBitSet bitSet = new FixedBitSet(maxDoc);
    long[] cost = new long[1];
    scorer.score(new LeafCollector() {

        @Override
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            cost[0]++;
            bitSet.set(doc);
        }
    }, null);
    return new BitDocIdSet(bitSet, cost[0]);
}

Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) IOException(java.io.IOException)

Example 2 with BitDocIdSet

use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.

the class TestConjunctionDISI method testConjunctionApproximation.

// Test that the conjunction approximation is correct
public void testConjunctionApproximation() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
        final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
        final int numIterators = TestUtil.nextInt(random(), 2, 5);
        final FixedBitSet[] sets = new FixedBitSet[numIterators];
        final Scorer[] iterators = new Scorer[numIterators];
        boolean hasApproximation = false;
        for (int i = 0; i < iterators.length; ++i) {
            final FixedBitSet set = randomSet(maxDoc);
            if (random().nextBoolean()) {
                // simple iterator
                sets[i] = set;
                iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
            } else {
                // scorer with approximation
                final FixedBitSet confirmed = clearRandomBits(set);
                sets[i] = confirmed;
                final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
                iterators[i] = scorer(approximation);
                hasApproximation = true;
            }
        }
        final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
        TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
        assertEquals(hasApproximation, twoPhaseIterator != null);
        if (hasApproximation) {
            assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
        }
    }
}

Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 3 with BitDocIdSet

use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.

the class TestConjunctionDISI method testCollapseSubConjunctions.

public void testCollapseSubConjunctions(boolean wrapWithScorer) throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
        final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
        final int numIterators = TestUtil.nextInt(random(), 5, 10);
        final FixedBitSet[] sets = new FixedBitSet[numIterators];
        final List<Scorer> scorers = new LinkedList<>();
        for (int i = 0; i < numIterators; ++i) {
            final FixedBitSet set = randomSet(maxDoc);
            if (random().nextBoolean()) {
                // simple iterator
                sets[i] = set;
                scorers.add(new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator()));
            } else {
                // scorer with approximation
                final FixedBitSet confirmed = clearRandomBits(set);
                sets[i] = confirmed;
                final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
                scorers.add(scorer(approximation));
            }
        }
        // make some sub sequences into sub conjunctions
        final int subIters = atLeast(3);
        for (int subIter = 0; subIter < subIters && scorers.size() > 3; ++subIter) {
            final int subSeqStart = TestUtil.nextInt(random(), 0, scorers.size() - 2);
            final int subSeqEnd = TestUtil.nextInt(random(), subSeqStart + 2, scorers.size());
            List<Scorer> subIterators = scorers.subList(subSeqStart, subSeqEnd);
            Scorer subConjunction;
            if (wrapWithScorer) {
                subConjunction = new ConjunctionScorer(null, subIterators, Collections.emptyList());
            } else {
                subConjunction = new ConstantScoreScorer(null, 0f, ConjunctionDISI.intersectScorers(subIterators));
            }
            scorers.set(subSeqStart, subConjunction);
            int toRemove = subSeqEnd - subSeqStart - 1;
            while (toRemove-- > 0) {
                scorers.remove(subSeqStart + 1);
            }
        }
        if (scorers.size() == 1) {
            // ConjunctionDISI needs two iterators
            scorers.add(new ConstantScoreScorer(null, 0f, DocIdSetIterator.all(maxDoc)));
        }
        final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(scorers);
        assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
    }
}

Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) LinkedList(java.util.LinkedList)

Example 4 with BitDocIdSet

use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.

the class TestConjunctionDISI method testConjunction.

// Test that the conjunction iterator is correct
public void testConjunction() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
        final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
        final int numIterators = TestUtil.nextInt(random(), 2, 5);
        final FixedBitSet[] sets = new FixedBitSet[numIterators];
        final Scorer[] iterators = new Scorer[numIterators];
        for (int i = 0; i < iterators.length; ++i) {
            final FixedBitSet set = randomSet(maxDoc);
            switch(random().nextInt(3)) {
                case 0:
                    // simple iterator
                    sets[i] = set;
                    iterators[i] = new ConstantScoreScorer(null, 0f, anonymizeIterator(new BitDocIdSet(set).iterator()));
                    break;
                case 1:
                    // bitSet iterator
                    sets[i] = set;
                    iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
                    break;
                default:
                    // scorer with approximation
                    final FixedBitSet confirmed = clearRandomBits(set);
                    sets[i] = confirmed;
                    final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
                    iterators[i] = scorer(approximation);
                    break;
            }
        }
        final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
        assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
    }
}

Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 5 with BitDocIdSet

use of org.apache.lucene.util.BitDocIdSet in project lucene-solr by apache.

the class RandomSamplingFacetsCollector method createSample.

/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
    int maxdoc = docs.context.reader().maxDoc();
    // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
    FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
    int binSize = (int) (1.0 / samplingRate);
    try {
        int counter = 0;
        int limit, randomIndex;
        if (leftoverBin != NOT_CALCULATED) {
            limit = leftoverBin;
            // either NOT_CALCULATED, which means we already sampled from that bin,
            // or the next document to sample
            randomIndex = leftoverIndex;
        } else {
            limit = binSize;
            randomIndex = random.nextInt(binSize);
        }
        final DocIdSetIterator it = docs.bits.iterator();
        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
            if (counter == randomIndex) {
                sampleDocs.set(doc);
            }
            counter++;
            if (counter >= limit) {
                counter = 0;
                limit = binSize;
                randomIndex = random.nextInt(binSize);
            }
        }
        if (counter == 0) {
            // we either exhausted the bin and the iterator at the same time, or
            // this segment had no results. in the latter case we might want to
            // carry leftover to the next segment as is, but that complicates the
            // code and doesn't seem so important.
            leftoverBin = leftoverIndex = NOT_CALCULATED;
        } else {
            leftoverBin = limit - counter;
            if (randomIndex > counter) {
                // the document to sample is in the next bin
                leftoverIndex = randomIndex - counter;
            } else if (randomIndex < counter) {
                // we sampled a document from the bin, so just skip over remaining
                // documents in the bin in the next segment.
                leftoverIndex = NOT_CALCULATED;
            }
        }
        return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) IOException(java.io.IOException) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Aggregations

BitDocIdSet (org.apache.lucene.util.BitDocIdSet)11 FixedBitSet (org.apache.lucene.util.FixedBitSet)10 DocIdSet (org.apache.lucene.search.DocIdSet)4 IOException (java.io.IOException)3 LeafReader (org.apache.lucene.index.LeafReader)3 DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)2 Bits (org.apache.lucene.util.Bits)2 LinkedList (java.util.LinkedList)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexReaderContext (org.apache.lucene.index.IndexReaderContext)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 Scorer (org.apache.lucene.search.Scorer)1 Weight (org.apache.lucene.search.Weight)1 Cell (org.apache.lucene.spatial.prefix.tree.Cell)1 CellIterator (org.apache.lucene.spatial.prefix.tree.CellIterator)1 SpatialRelation (org.locationtech.spatial4j.shape.SpatialRelation)1