Examples with FixedBitSet - org.apache.lucene.util.FixedBitSet

Example 21 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class BaseDocValuesFormatTestCase method doTestRandomAdvance.

private void doTestRandomAdvance(FieldCreator fieldCreator) throws IOException {
    Analyzer analyzer = new MockAnalyzer(random());
    Directory directory = newDirectory();
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    conf.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter w = new RandomIndexWriter(random(), directory, conf);
    int numChunks = atLeast(10);
    int id = 0;
    Set<Integer> missingSet = new HashSet<>();
    for (int i = 0; i < numChunks; i++) {
        // change sparseness for each chunk
        double sparseChance = random().nextDouble();
        int docCount = atLeast(1000);
        for (int j = 0; j < docCount; j++) {
            Document doc = new Document();
            doc.add(new StoredField("id", id));
            if (random().nextDouble() > sparseChance) {
                doc.add(fieldCreator.next());
            } else {
                missingSet.add(id);
            }
            id++;
            w.addDocument(doc);
        }
    }
    if (random().nextBoolean()) {
        w.forceMerge(1);
    }
    // Now search the index:
    IndexReader r = w.getReader();
    BitSet missing = new FixedBitSet(r.maxDoc());
    for (int docID = 0; docID < r.maxDoc(); docID++) {
        Document doc = r.document(docID);
        if (missingSet.contains(doc.getField("id").numericValue())) {
            missing.set(docID);
        }
    }
    for (int iter = 0; iter < 100; iter++) {
        DocIdSetIterator values = fieldCreator.iterator(r);
        assertEquals(-1, values.docID());
        while (true) {
            int docID;
            if (random().nextBoolean()) {
                docID = values.nextDoc();
            } else {
                int range;
                if (random().nextInt(10) == 7) {
                    // big jump
                    range = r.maxDoc() - values.docID();
                } else {
                    // small jump
                    range = 25;
                }
                int inc = TestUtil.nextInt(random(), 1, range);
                docID = values.advance(values.docID() + inc);
            }
            if (docID == NO_MORE_DOCS) {
                break;
            }
            assertFalse(missing.get(docID));
        }
    }
    IOUtils.close(r, w, directory);
}

Also used : BitSet(org.apache.lucene.util.BitSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) Document(org.apache.lucene.document.Document) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) FixedBitSet(org.apache.lucene.util.FixedBitSet) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) Directory(org.apache.lucene.store.Directory) HashSet(java.util.HashSet)

Example 22 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class UniqueSinglevaluedSlotAcc method collect.

@Override
public void collect(int doc, int slotNum) throws IOException {
    if (doc > subDv.docID()) {
        subDv.advance(doc);
    }
    if (doc == subDv.docID()) {
        int segOrd = subDv.ordValue();
        int ord = toGlobal == null ? segOrd : (int) toGlobal.get(segOrd);
        FixedBitSet bits = arr[slotNum];
        if (bits == null) {
            bits = new FixedBitSet(nTerms);
            arr[slotNum] = bits;
        }
        bits.set(ord);
    }
}

Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 23 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class UniqueSlotAcc method calcCounts.

// we only calculate all the counts when sorting by count
public void calcCounts() {
    counts = new int[arr.length];
    for (int i = 0; i < arr.length; i++) {
        FixedBitSet bs = arr[i];
        counts[i] = bs == null ? 0 : bs.cardinality();
    }
}

Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 24 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class UnInvertedField method getCounts.

private void getCounts(FacetFieldProcessorByArrayUIF processor, CountSlotAcc counts) throws IOException {
    DocSet docs = processor.fcontext.base;
    int baseSize = docs.size();
    int maxDoc = searcher.maxDoc();
    // what about allBuckets?
    if (baseSize < processor.effectiveMincount) {
        return;
    }
    final int[] index = this.index;
    boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && docs instanceof BitDocSet;
    if (doNegative) {
        FixedBitSet bs = ((BitDocSet) docs).getBits().clone();
        bs.flip(0, maxDoc);
        // TODO: when iterator across negative elements is available, use that
        // instead of creating a new bitset and inverting.
        docs = new BitDocSet(bs, maxDoc - baseSize);
    // simply negating will mean that we have deleted docs in the set.
    // that should be OK, as their entries in our table should be empty.
    }
    // For the biggest terms, do straight set intersections
    for (TopTerm tt : bigTerms.values()) {
        // TODO: counts could be deferred if sorting by index order
        counts.incrementCount(tt.termNum, searcher.numDocs(tt.termQuery, docs));
    }
    if (termInstances > 0) {
        DocIterator iter = docs.iterator();
        while (iter.hasNext()) {
            int doc = iter.nextDoc();
            int code = index[doc];
            if ((code & 0xff) == 1) {
                int pos = code >>> 8;
                int whichArray = (doc >>> 16) & 0xff;
                byte[] arr = tnums[whichArray];
                int tnum = 0;
                for (; ; ) {
                    int delta = 0;
                    for (; ; ) {
                        byte b = arr[pos++];
                        delta = (delta << 7) | (b & 0x7f);
                        if ((b & 0x80) == 0)
                            break;
                    }
                    if (delta == 0)
                        break;
                    tnum += delta - TNUM_OFFSET;
                    counts.incrementCount(tnum, 1);
                }
            } else {
                int tnum = 0;
                int delta = 0;
                for (; ; ) {
                    delta = (delta << 7) | (code & 0x7f);
                    if ((code & 0x80) == 0) {
                        if (delta == 0)
                            break;
                        tnum += delta - TNUM_OFFSET;
                        counts.incrementCount(tnum, 1);
                        delta = 0;
                    }
                    code >>>= 8;
                }
            }
        }
    }
    if (doNegative) {
        for (int i = 0; i < numTermsInField; i++) {
            //       counts[i] = maxTermCounts[i] - counts[i];
            counts.incrementCount(i, maxTermCounts[i] - counts.getCount(i) * 2);
        }
    }
/*** TODO - future optimization to handle allBuckets
    if (processor.allBucketsSlot >= 0) {
      int all = 0;  // overflow potential
      for (int i=0; i<numTermsInField; i++) {
        all += counts.getCount(i);
      }
      counts.incrementCount(processor.allBucketsSlot, all);
    }
     ***/
}

Also used : BitDocSet(org.apache.solr.search.BitDocSet) DocIterator(org.apache.solr.search.DocIterator) FixedBitSet(org.apache.lucene.util.FixedBitSet) BitDocSet(org.apache.solr.search.BitDocSet) DocSet(org.apache.solr.search.DocSet)

Example 25 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class UniqueMultivaluedSlotAcc method collect.

@Override
public void collect(int doc, int slotNum) throws IOException {
    bits = arr[slotNum];
    if (bits == null) {
        bits = new FixedBitSet(nTerms);
        arr[slotNum] = bits;
    }
    // this will call back to our Callback.call(int termNum)
    docToTerm.getBigTerms(doc + currentDocBase, this);
    docToTerm.getSmallTerms(doc + currentDocBase, this);
}

Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9