use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class BaseDocValuesFormatTestCase method doTestRandomAdvance.
private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Directory directory = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
conf.setMergePolicy(newLogMergePolicy());
RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
int numChunks = atLeast(10);
int id = 0;
Set<Integer> missingSet = new HashSet<>();
for (int i = 0; i < numChunks; i++) {
// change sparseness for each chunk
double sparseChance = random().nextDouble();
int docCount = atLeast(1000);
for (int j = 0; j < docCount; j++) {
Document doc = new Document();
doc.add(new StoredField("id", id));
if (random().nextDouble() > sparseChance) {
doc.add(fieldCreator.next());
} else {
missingSet.add(id);
}
id++;
w.addDocument(doc);
}
}
if (random().nextBoolean()) {
w.forceMerge(1);
}
// Now search the index:
IndexReader r = w.getReader();
BitSet missing = new FixedBitSet(r.maxDoc());
for (int docID = 0; docID < r.maxDoc(); docID++) {
Document doc = r.document(docID);
if (missingSet.contains(doc.getField("id").numericValue())) {
missing.set(docID);
}
}
for (int iter = 0; iter < 100; iter++) {
DocIdSetIterator values = fieldCreator.iterator(r);
assertEquals(-1, values.docID());
while (true) {
int docID;
if (random().nextBoolean()) {
docID = values.nextDoc();
} else {
int range;
if (random().nextInt(10) == 7) {
// big jump
range = r.maxDoc() - values.docID();
} else {
// small jump
range = 25;
}
int inc = TestUtil.nextInt(random(), 1, range);
docID = values.advance(values.docID() + inc);
}
if (docID == NO_MORE_DOCS) {
break;
}
assertFalse(missing.get(docID));
}
}
IOUtils.close(r, w, directory);
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class UniqueSinglevaluedSlotAcc method collect.
@Override
public void collect(int doc, int slotNum) throws IOException {
if (doc > subDv.docID()) {
subDv.advance(doc);
}
if (doc == subDv.docID()) {
int segOrd = subDv.ordValue();
int ord = toGlobal == null ? segOrd : (int) toGlobal.get(segOrd);
FixedBitSet bits = arr[slotNum];
if (bits == null) {
bits = new FixedBitSet(nTerms);
arr[slotNum] = bits;
}
bits.set(ord);
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class UniqueSlotAcc method calcCounts.
// we only calculate all the counts when sorting by count
public void calcCounts() {
counts = new int[arr.length];
for (int i = 0; i < arr.length; i++) {
FixedBitSet bs = arr[i];
counts[i] = bs == null ? 0 : bs.cardinality();
}
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class UnInvertedField method getCounts.
private void getCounts(FacetFieldProcessorByArrayUIF processor, CountSlotAcc counts) throws IOException {
DocSet docs = processor.fcontext.base;
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
// what about allBuckets?
if (baseSize < processor.effectiveMincount) {
return;
}
final int[] index = this.index;
boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && docs instanceof BitDocSet;
if (doNegative) {
FixedBitSet bs = ((BitDocSet) docs).getBits().clone();
bs.flip(0, maxDoc);
// TODO: when iterator across negative elements is available, use that
// instead of creating a new bitset and inverting.
docs = new BitDocSet(bs, maxDoc - baseSize);
// simply negating will mean that we have deleted docs in the set.
// that should be OK, as their entries in our table should be empty.
}
// For the biggest terms, do straight set intersections
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorting by index order
counts.incrementCount(tt.termNum, searcher.numDocs(tt.termQuery, docs));
}
if (termInstances > 0) {
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
int code = index[doc];
if ((code & 0xff) == 1) {
int pos = code >>> 8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
int tnum = 0;
for (; ; ) {
int delta = 0;
for (; ; ) {
byte b = arr[pos++];
delta = (delta << 7) | (b & 0x7f);
if ((b & 0x80) == 0)
break;
}
if (delta == 0)
break;
tnum += delta - TNUM_OFFSET;
counts.incrementCount(tnum, 1);
}
} else {
int tnum = 0;
int delta = 0;
for (; ; ) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80) == 0) {
if (delta == 0)
break;
tnum += delta - TNUM_OFFSET;
counts.incrementCount(tnum, 1);
delta = 0;
}
code >>>= 8;
}
}
}
}
if (doNegative) {
for (int i = 0; i < numTermsInField; i++) {
// counts[i] = maxTermCounts[i] - counts[i];
counts.incrementCount(i, maxTermCounts[i] - counts.getCount(i) * 2);
}
}
/*** TODO - future optimization to handle allBuckets
if (processor.allBucketsSlot >= 0) {
int all = 0; // overflow potential
for (int i=0; i<numTermsInField; i++) {
all += counts.getCount(i);
}
counts.incrementCount(processor.allBucketsSlot, all);
}
***/
}
use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.
the class UniqueMultivaluedSlotAcc method collect.
@Override
public void collect(int doc, int slotNum) throws IOException {
bits = arr[slotNum];
if (bits == null) {
bits = new FixedBitSet(nTerms);
arr[slotNum] = bits;
}
// this will call back to our Callback.call(int termNum)
docToTerm.getBigTerms(doc + currentDocBase, this);
docToTerm.getSmallTerms(doc + currentDocBase, this);
}
Aggregations