use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class SortingLeafReader method getNormValues.
@Override
public NumericDocValues getNormValues(String field) throws IOException {
final NumericDocValues oldNorms = in.getNormValues(field);
if (oldNorms == null)
return null;
CachedNumericDVs norms;
synchronized (cachedNorms) {
norms = cachedNorms.get(field);
if (norms == null) {
FixedBitSet docsWithField = new FixedBitSet(maxDoc());
long[] values = new long[maxDoc()];
while (true) {
int docID = oldNorms.nextDoc();
if (docID == NO_MORE_DOCS) {
break;
}
int newDocID = docMap.oldToNew(docID);
docsWithField.set(newDocID);
values[newDocID] = oldNorms.longValue();
}
norms = new CachedNumericDVs(values, docsWithField);
cachedNorms.put(field, norms);
}
}
return new SortingNumericDocValues(norms);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestBooleanOr method testBooleanScorerMax.
public void testBooleanScorerMax() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
int docCount = atLeast(10000);
for (int i = 0; i < docCount; i++) {
Document doc = new Document();
doc.add(newField("field", "a", TextField.TYPE_NOT_STORED));
riw.addDocument(doc);
}
riw.forceMerge(1);
IndexReader r = riw.getReader();
riw.close();
IndexSearcher s = newSearcher(r);
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
Weight w = s.createNormalizedWeight(bq.build(), true);
assertEquals(1, s.getIndexReader().leaves().size());
BulkScorer scorer = w.bulkScorer(s.getIndexReader().leaves().get(0));
final FixedBitSet hits = new FixedBitSet(docCount);
final AtomicInteger end = new AtomicInteger();
LeafCollector c = new SimpleCollector() {
@Override
public void collect(int doc) {
assertTrue("collected doc=" + doc + " beyond max=" + end, doc < end.intValue());
hits.set(doc);
}
@Override
public boolean needsScores() {
return false;
}
};
while (end.intValue() < docCount) {
final int min = end.intValue();
final int inc = TestUtil.nextInt(random(), 1, 1000);
final int max = end.addAndGet(inc);
scorer.score(c, null, min, max);
}
assertEquals(docCount, hits.cardinality());
r.close();
dir.close();
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class TestConjunctionDISI method testConjunction.
// Test that the conjunction iterator is correct
public void testConjunction() throws IOException {
final int iters = atLeast(100);
for (int iter = 0; iter < iters; ++iter) {
final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
final int numIterators = TestUtil.nextInt(random(), 2, 5);
final FixedBitSet[] sets = new FixedBitSet[numIterators];
final Scorer[] iterators = new Scorer[numIterators];
for (int i = 0; i < iterators.length; ++i) {
final FixedBitSet set = randomSet(maxDoc);
switch(random().nextInt(3)) {
case 0:
// simple iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, anonymizeIterator(new BitDocIdSet(set).iterator()));
break;
case 1:
// bitSet iterator
sets[i] = set;
iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
break;
default:
// scorer with approximation
final FixedBitSet confirmed = clearRandomBits(set);
sets[i] = confirmed;
final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
iterators[i] = scorer(approximation);
break;
}
}
final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class CheckIndex method checkTermRanges.
/** Make an effort to visit "fake" (e.g. auto-prefix) terms. We do this by running term range intersections across an initially wide
* interval of terms, at different boundaries, and then gradually decrease the interval. This is not guaranteed to hit all non-real
* terms (doing that in general is non-trivial), but it should hit many of them, and validate their postings against the postings for the
* real terms. */
private static void checkTermRanges(String field, int maxDoc, Terms terms, long numTerms) throws IOException {
// We'll target this many terms in our interval for the current level:
double currentInterval = numTerms;
FixedBitSet normalDocs = new FixedBitSet(maxDoc);
FixedBitSet intersectDocs = new FixedBitSet(maxDoc);
while (currentInterval >= 10.0) {
//System.out.println(" cycle interval=" + currentInterval);
// We iterate this terms enum to locate min/max term for each sliding/overlapping interval we test at the current level:
TermsEnum termsEnum = terms.iterator();
long termCount = 0;
Deque<BytesRef> termBounds = new LinkedList<>();
long lastTermAdded = Long.MIN_VALUE;
BytesRefBuilder lastTerm = null;
while (true) {
BytesRef term = termsEnum.next();
if (term == null) {
break;
}
//System.out.println(" top: term=" + term.utf8ToString());
if (termCount >= lastTermAdded + currentInterval / 4) {
termBounds.add(BytesRef.deepCopyOf(term));
lastTermAdded = termCount;
if (termBounds.size() == 5) {
BytesRef minTerm = termBounds.removeFirst();
BytesRef maxTerm = termBounds.getLast();
checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
}
}
termCount++;
if (lastTerm == null) {
lastTerm = new BytesRefBuilder();
lastTerm.copyBytes(term);
} else {
if (lastTerm.get().compareTo(term) >= 0) {
throw new RuntimeException("terms out of order: lastTerm=" + lastTerm.get() + " term=" + term);
}
lastTerm.copyBytes(term);
}
}
if (lastTerm != null && termBounds.isEmpty() == false) {
BytesRef minTerm = termBounds.removeFirst();
BytesRef maxTerm = lastTerm.get();
checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
}
currentInterval *= .75;
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class CheckIndex method checkSortedDocValues.
private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv) throws IOException {
if (dv.docID() != -1) {
throw new RuntimeException("sorted dv iterator for field: " + fieldName + " should start at docID=-1, but got " + dv.docID());
}
final int maxOrd = dv.getValueCount() - 1;
FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
int maxOrd2 = -1;
int docID;
while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
int ord = dv.ordValue();
if (ord == -1) {
throw new RuntimeException("dv for field: " + fieldName + " has -1 ord");
} else if (ord < -1 || ord > maxOrd) {
throw new RuntimeException("ord out of bounds: " + ord);
} else {
maxOrd2 = Math.max(maxOrd2, ord);
seenOrds.set(ord);
}
}
if (maxOrd != maxOrd2) {
throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
}
if (seenOrds.cardinality() != dv.getValueCount()) {
throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
}
BytesRef lastValue = null;
for (int i = 0; i <= maxOrd; i++) {
final BytesRef term = dv.lookupOrd(i);
term.isValid();
if (lastValue != null) {
if (term.compareTo(lastValue) <= 0) {
throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term);
}
}
lastValue = BytesRef.deepCopyOf(term);
}
}
Aggregations