use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SloppyPhraseScorer method termGroups.
/** map each term to the single group that contains it */
private HashMap<Term, Integer> termGroups(LinkedHashMap<Term, Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
HashMap<Term, Integer> tg = new HashMap<>();
Term[] t = tord.keySet().toArray(new Term[0]);
for (int i = 0; i < bb.size(); i++) {
// i is the group no.
FixedBitSet bits = bb.get(i);
for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) {
tg.put(t[ord], i);
}
}
return tg;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SloppyPhraseScorer method gatherRptGroups.
/** Detect repetition groups. Done once - for first doc */
private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term, Integer> rptTerms) throws IOException {
PhrasePositions[] rpp = repeatingPPs(rptTerms);
ArrayList<ArrayList<PhrasePositions>> res = new ArrayList<>();
if (!hasMultiTermRpts) {
// simpler - no multi-terms - can base on positions in first doc
for (int i = 0; i < rpp.length; i++) {
PhrasePositions pp = rpp[i];
// already marked as a repetition
if (pp.rptGroup >= 0)
continue;
int tpPos = tpPos(pp);
for (int j = i + 1; j < rpp.length; j++) {
PhrasePositions pp2 = rpp[j];
if (// already marked as a repetition
pp2.rptGroup >= 0 || // not a repetition: two PPs are originally in same offset in the query!
pp2.offset == pp.offset || tpPos(pp2) != tpPos) {
// not a repetition
continue;
}
// a repetition
int g = pp.rptGroup;
if (g < 0) {
g = res.size();
pp.rptGroup = g;
ArrayList<PhrasePositions> rl = new ArrayList<>(2);
rl.add(pp);
res.add(rl);
}
pp2.rptGroup = g;
res.get(g).add(pp2);
}
}
} else {
// more involved - has multi-terms
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<>();
ArrayList<FixedBitSet> bb = ppTermsBitSets(rpp, rptTerms);
unionTermGroups(bb);
HashMap<Term, Integer> tg = termGroups(rptTerms, bb);
HashSet<Integer> distinctGroupIDs = new HashSet<>(tg.values());
for (int i = 0; i < distinctGroupIDs.size(); i++) {
tmp.add(new HashSet<PhrasePositions>());
}
for (PhrasePositions pp : rpp) {
for (Term t : pp.terms) {
if (rptTerms.containsKey(t)) {
int g = tg.get(t);
tmp.get(g).add(pp);
assert pp.rptGroup == -1 || pp.rptGroup == g;
pp.rptGroup = g;
}
}
}
for (HashSet<PhrasePositions> hs : tmp) {
res.add(new ArrayList<>(hs));
}
}
return res;
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class LRUQueryCache method cacheIntoBitSet.
private static DocIdSet cacheIntoBitSet(BulkScorer scorer, int maxDoc) throws IOException {
final FixedBitSet bitSet = new FixedBitSet(maxDoc);
long[] cost = new long[1];
scorer.score(new LeafCollector() {
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
cost[0]++;
bitSet.set(doc);
}
}, null);
return new BitDocIdSet(bitSet, cost[0]);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestConjunctionDISI method testConjunctionApproximation.
// Test that the conjunction approximation is correct
public void testConjunctionApproximation() throws IOException {
final int iters = atLeast(100);
for (int iter = 0; iter < iters; ++iter) {
final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
final int numIterators = TestUtil.nextInt(random(), 2, 5);
final FixedBitSet[] sets = new FixedBitSet[numIterators];
final Scorer[] iterators = new Scorer[numIterators];
boolean hasApproximation = false;
for (int i = 0; i < iterators.length; ++i) {
final FixedBitSet set = randomSet(maxDoc);
if (random().nextBoolean()) {
// simple iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
} else {
// scorer with approximation
final FixedBitSet confirmed = clearRandomBits(set);
sets[i] = confirmed;
final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
iterators[i] = scorer(approximation);
hasApproximation = true;
}
}
final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
assertEquals(hasApproximation, twoPhaseIterator != null);
if (hasApproximation) {
assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
}
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestConjunctionDISI method randomSet.
private static FixedBitSet randomSet(int maxDoc) {
final int step = TestUtil.nextInt(random(), 1, 10);
FixedBitSet set = new FixedBitSet(maxDoc);
for (int doc = random().nextInt(step); doc < maxDoc; doc += TestUtil.nextInt(random(), 1, step)) {
set.set(doc);
}
return set;
}
Aggregations