Search in sources :

Example 46 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class SortingLeafReader method getNormValues.

@Override
public NumericDocValues getNormValues(String field) throws IOException {
    final NumericDocValues oldNorms = in.getNormValues(field);
    if (oldNorms == null)
        return null;
    CachedNumericDVs norms;
    synchronized (cachedNorms) {
        norms = cachedNorms.get(field);
        if (norms == null) {
            FixedBitSet docsWithField = new FixedBitSet(maxDoc());
            long[] values = new long[maxDoc()];
            while (true) {
                int docID = oldNorms.nextDoc();
                if (docID == NO_MORE_DOCS) {
                    break;
                }
                int newDocID = docMap.oldToNew(docID);
                docsWithField.set(newDocID);
                values[newDocID] = oldNorms.longValue();
            }
            norms = new CachedNumericDVs(values, docsWithField);
            cachedNorms.put(field, norms);
        }
    }
    return new SortingNumericDocValues(norms);
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 47 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestBooleanOr method testBooleanScorerMax.

public void testBooleanScorerMax() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter riw = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())));
    int docCount = atLeast(10000);
    for (int i = 0; i < docCount; i++) {
        Document doc = new Document();
        doc.add(newField("field", "a", TextField.TYPE_NOT_STORED));
        riw.addDocument(doc);
    }
    riw.forceMerge(1);
    IndexReader r = riw.getReader();
    riw.close();
    IndexSearcher s = newSearcher(r);
    BooleanQuery.Builder bq = new BooleanQuery.Builder();
    bq.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
    bq.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
    Weight w = s.createNormalizedWeight(bq.build(), true);
    assertEquals(1, s.getIndexReader().leaves().size());
    BulkScorer scorer = w.bulkScorer(s.getIndexReader().leaves().get(0));
    final FixedBitSet hits = new FixedBitSet(docCount);
    final AtomicInteger end = new AtomicInteger();
    LeafCollector c = new SimpleCollector() {

        @Override
        public void collect(int doc) {
            assertTrue("collected doc=" + doc + " beyond max=" + end, doc < end.intValue());
            hits.set(doc);
        }

        @Override
        public boolean needsScores() {
            return false;
        }
    };
    while (end.intValue() < docCount) {
        final int min = end.intValue();
        final int inc = TestUtil.nextInt(random(), 1, 1000);
        final int max = end.addAndGet(inc);
        scorer.score(c, null, min, max);
    }
    assertEquals(docCount, hits.cardinality());
    r.close();
    dir.close();
}
Also used : Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) FixedBitSet(org.apache.lucene.util.FixedBitSet) IndexReader(org.apache.lucene.index.IndexReader) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Directory(org.apache.lucene.store.Directory)

Example 48 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class TestConjunctionDISI method testConjunction.

// Test that the conjunction iterator is correct
public void testConjunction() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
        final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
        final int numIterators = TestUtil.nextInt(random(), 2, 5);
        final FixedBitSet[] sets = new FixedBitSet[numIterators];
        final Scorer[] iterators = new Scorer[numIterators];
        for (int i = 0; i < iterators.length; ++i) {
            final FixedBitSet set = randomSet(maxDoc);
            switch(random().nextInt(3)) {
                case 0:
                    // simple iterator
                    sets[i] = set;
                    iterators[i] = new ConstantScoreScorer(null, 0f, anonymizeIterator(new BitDocIdSet(set).iterator()));
                    break;
                case 1:
                    // bitSet iterator
                    sets[i] = set;
                    iterators[i] = new ConstantScoreScorer(null, 0f, new BitDocIdSet(set).iterator());
                    break;
                default:
                    // scorer with approximation
                    final FixedBitSet confirmed = clearRandomBits(set);
                    sets[i] = confirmed;
                    final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
                    iterators[i] = scorer(approximation);
                    break;
            }
        }
        final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
        assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
    }
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet)

Example 49 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class CheckIndex method checkTermRanges.

/** Make an effort to visit "fake" (e.g. auto-prefix) terms.  We do this by running term range intersections across an initially wide
   *  interval of terms, at different boundaries, and then gradually decrease the interval.  This is not guaranteed to hit all non-real
   *  terms (doing that in general is non-trivial), but it should hit many of them, and validate their postings against the postings for the
   *  real terms. */
private static void checkTermRanges(String field, int maxDoc, Terms terms, long numTerms) throws IOException {
    // We'll target this many terms in our interval for the current level:
    double currentInterval = numTerms;
    FixedBitSet normalDocs = new FixedBitSet(maxDoc);
    FixedBitSet intersectDocs = new FixedBitSet(maxDoc);
    while (currentInterval >= 10.0) {
        //System.out.println("  cycle interval=" + currentInterval);
        // We iterate this terms enum to locate min/max term for each sliding/overlapping interval we test at the current level:
        TermsEnum termsEnum = terms.iterator();
        long termCount = 0;
        Deque<BytesRef> termBounds = new LinkedList<>();
        long lastTermAdded = Long.MIN_VALUE;
        BytesRefBuilder lastTerm = null;
        while (true) {
            BytesRef term = termsEnum.next();
            if (term == null) {
                break;
            }
            //System.out.println("  top: term=" + term.utf8ToString());
            if (termCount >= lastTermAdded + currentInterval / 4) {
                termBounds.add(BytesRef.deepCopyOf(term));
                lastTermAdded = termCount;
                if (termBounds.size() == 5) {
                    BytesRef minTerm = termBounds.removeFirst();
                    BytesRef maxTerm = termBounds.getLast();
                    checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
                }
            }
            termCount++;
            if (lastTerm == null) {
                lastTerm = new BytesRefBuilder();
                lastTerm.copyBytes(term);
            } else {
                if (lastTerm.get().compareTo(term) >= 0) {
                    throw new RuntimeException("terms out of order: lastTerm=" + lastTerm.get() + " term=" + term);
                }
                lastTerm.copyBytes(term);
            }
        }
        if (lastTerm != null && termBounds.isEmpty() == false) {
            BytesRef minTerm = termBounds.removeFirst();
            BytesRef maxTerm = lastTerm.get();
            checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
        }
        currentInterval *= .75;
    }
}
Also used : BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) FixedBitSet(org.apache.lucene.util.FixedBitSet) LinkedList(java.util.LinkedList) BytesRef(org.apache.lucene.util.BytesRef)

Example 50 with FixedBitSet

use of org.apache.lucene.util.FixedBitSet in project lucene-solr by apache.

the class CheckIndex method checkSortedDocValues.

private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv) throws IOException {
    if (dv.docID() != -1) {
        throw new RuntimeException("sorted dv iterator for field: " + fieldName + " should start at docID=-1, but got " + dv.docID());
    }
    final int maxOrd = dv.getValueCount() - 1;
    FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
    int maxOrd2 = -1;
    int docID;
    while ((docID = dv.nextDoc()) != NO_MORE_DOCS) {
        int ord = dv.ordValue();
        if (ord == -1) {
            throw new RuntimeException("dv for field: " + fieldName + " has -1 ord");
        } else if (ord < -1 || ord > maxOrd) {
            throw new RuntimeException("ord out of bounds: " + ord);
        } else {
            maxOrd2 = Math.max(maxOrd2, ord);
            seenOrds.set(ord);
        }
    }
    if (maxOrd != maxOrd2) {
        throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
    }
    if (seenOrds.cardinality() != dv.getValueCount()) {
        throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
    }
    BytesRef lastValue = null;
    for (int i = 0; i <= maxOrd; i++) {
        final BytesRef term = dv.lookupOrd(i);
        term.isValid();
        if (lastValue != null) {
            if (term.compareTo(lastValue) <= 0) {
                throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term);
            }
        }
        lastValue = BytesRef.deepCopyOf(term);
    }
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

FixedBitSet (org.apache.lucene.util.FixedBitSet)162 Term (org.apache.lucene.index.Term)27 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)26 Directory (org.apache.lucene.store.Directory)25 BytesRef (org.apache.lucene.util.BytesRef)22 IOException (java.io.IOException)19 Document (org.apache.lucene.document.Document)17 ArrayList (java.util.ArrayList)15 Query (org.apache.lucene.search.Query)15 NumericDocValues (org.apache.lucene.index.NumericDocValues)14 BitDocIdSet (org.apache.lucene.util.BitDocIdSet)13 Bits (org.apache.lucene.util.Bits)13 LeafReader (org.apache.lucene.index.LeafReader)12 IndexSearcher (org.apache.lucene.search.IndexSearcher)12 TermQuery (org.apache.lucene.search.TermQuery)12 IndexReader (org.apache.lucene.index.IndexReader)11 HashSet (java.util.HashSet)10 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)10 DocIterator (org.apache.solr.search.DocIterator)10 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)9