Search in sources :

Example 66 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SloppyPhraseScorer method termGroups.

/** map each term to the single group that contains it */
private HashMap<Term, Integer> termGroups(LinkedHashMap<Term, Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
    HashMap<Term, Integer> tg = new HashMap<>();
    Term[] t = tord.keySet().toArray(new Term[0]);
    for (int i = 0; i < bb.size(); i++) {
        // i is the group no.
        FixedBitSet bits = bb.get(i);
        for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) {
            tg.put(t[ord], i);
        }
    }
    return tg;
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) FixedBitSet(org.apache.lucene.util.FixedBitSet) Term(org.apache.lucene.index.Term)

Example 67 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SloppyPhraseScorer method gatherRptGroups.

/** Detect repetition groups. Done once - for first doc */
private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term, Integer> rptTerms) throws IOException {
    PhrasePositions[] rpp = repeatingPPs(rptTerms);
    ArrayList<ArrayList<PhrasePositions>> res = new ArrayList<>();
    if (!hasMultiTermRpts) {
        // simpler - no multi-terms - can base on positions in first doc
        for (int i = 0; i < rpp.length; i++) {
            PhrasePositions pp = rpp[i];
            // already marked as a repetition
            if (pp.rptGroup >= 0)
                continue;
            int tpPos = tpPos(pp);
            for (int j = i + 1; j < rpp.length; j++) {
                PhrasePositions pp2 = rpp[j];
                if (// already marked as a repetition
                pp2.rptGroup >= 0 || // not a repetition: two PPs are originally in same offset in the query! 
                pp2.offset == pp.offset || tpPos(pp2) != tpPos) {
                    // not a repetition
                    continue;
                }
                // a repetition
                int g = pp.rptGroup;
                if (g < 0) {
                    g = res.size();
                    pp.rptGroup = g;
                    ArrayList<PhrasePositions> rl = new ArrayList<>(2);
                    rl.add(pp);
                    res.add(rl);
                }
                pp2.rptGroup = g;
                res.get(g).add(pp2);
            }
        }
    } else {
        // more involved - has multi-terms
        ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<>();
        ArrayList<FixedBitSet> bb = ppTermsBitSets(rpp, rptTerms);
        unionTermGroups(bb);
        HashMap<Term, Integer> tg = termGroups(rptTerms, bb);
        HashSet<Integer> distinctGroupIDs = new HashSet<>(tg.values());
        for (int i = 0; i < distinctGroupIDs.size(); i++) {
            tmp.add(new HashSet<PhrasePositions>());
        }
        for (PhrasePositions pp : rpp) {
            for (Term t : pp.terms) {
                if (rptTerms.containsKey(t)) {
                    int g = tg.get(t);
                    tmp.get(g).add(pp);
                    assert pp.rptGroup == -1 || pp.rptGroup == g;
                    pp.rptGroup = g;
                }
            }
        }
        for (HashSet<PhrasePositions> hs : tmp) {
            res.add(new ArrayList<>(hs));
        }
    }
    return res;
}
Also used : ArrayList(java.util.ArrayList) Term(org.apache.lucene.index.Term) FixedBitSet(org.apache.lucene.util.FixedBitSet) HashSet(java.util.HashSet)

Example 68 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class LRUQueryCache method cacheIntoBitSet.

private static DocIdSet cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException {
    final FixedBitSet bitSet = new FixedBitSet(maxDoc);
    long[] cost = new long[1];
    scorer.score(new LeafCollector() {

        @Override
        public void setScorer(Scorer scorer) throws IOException {
        }

        @Override
        public void collect(int doc) throws IOException {
            cost[0]++;
            bitSet.set(doc);
        }
    }, null);
    return new BitDocIdSet(bitSet, cost[0]);
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) IOException(java.io.IOException)

Example 69 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestConjunctionDISI method testConjunctionApproximation.

// Test that the conjunction approximation is correct
public void testConjunctionApproximation() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
        final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
        final int numIterators = TestUtil.nextInt(random(), 2, 5);
        final FixedBitSet[] sets = new FixedBitSet[numIterators];
        final Scorer[] iterators = new Scorer[numIterators];
        boolean hasApproximation = false;
        for (int i = 0; i < iterators.length; ++i) {
            final FixedBitSet set = randomSet(maxDoc);
            if (random().nextBoolean()) {
                // simple iterator
                sets[i] = set;
                iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
            } else {
                // scorer with approximation
                final FixedBitSet confirmed = clearRandomBits(set);
                sets[i] = confirmed;
                final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
                iterators[i] = scorer(approximation);
                hasApproximation = true;
            }
        }
        final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
        TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
        assertEquals(hasApproximation, twoPhaseIterator != null);
        if (hasApproximation) {
            assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
        }
    }
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 70 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestConjunctionDISI method randomSet.

private static FixedBitSet randomSet(int maxDoc) {
    final int step = TestUtil.nextInt(random(), 1, 10);
    FixedBitSet set = new FixedBitSet(maxDoc);
    for (int doc = random().nextInt(step); doc < maxDoc; doc += TestUtil.nextInt(random(), 1, step)) {
        set.set(doc);
    }
    return set;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9