Search in sources :

Example 16 with DocIdSetIterator

use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.

the class RandomSamplingFacetsCollector method createSample.

/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
    int maxdoc = docs.context.reader().maxDoc();
    // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
    FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
    int binSize = (int) (1.0 / samplingRate);
    try {
        int counter = 0;
        int limit, randomIndex;
        if (leftoverBin != NOT_CALCULATED) {
            limit = leftoverBin;
            // either NOT_CALCULATED, which means we already sampled from that bin,
            // or the next document to sample
            randomIndex = leftoverIndex;
        } else {
            limit = binSize;
            randomIndex = random.nextInt(binSize);
        }
        final DocIdSetIterator it = docs.bits.iterator();
        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
            if (counter == randomIndex) {
                sampleDocs.set(doc);
            }
            counter++;
            if (counter >= limit) {
                counter = 0;
                limit = binSize;
                randomIndex = random.nextInt(binSize);
            }
        }
        if (counter == 0) {
            // we either exhausted the bin and the iterator at the same time, or
            // this segment had no results. in the latter case we might want to
            // carry leftover to the next segment as is, but that complicates the
            // code and doesn't seem so important.
            leftoverBin = leftoverIndex = NOT_CALCULATED;
        } else {
            leftoverBin = limit - counter;
            if (randomIndex > counter) {
                // the document to sample is in the next bin
                leftoverIndex = randomIndex - counter;
            } else if (randomIndex < counter) {
                // we sampled a document from the bin, so just skip over remaining
                // documents in the bin in the next segment.
                leftoverIndex = NOT_CALCULATED;
            }
        }
        return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : BitDocIdSet(org.apache.lucene.util.BitDocIdSet) FixedBitSet(org.apache.lucene.util.FixedBitSet) IOException(java.io.IOException) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 17 with DocIdSetIterator

use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.

the class DoubleRangeFacetCounts method count.

private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
    DoubleRange[] ranges = (DoubleRange[]) this.ranges;
    LongRange[] longRanges = new LongRange[ranges.length];
    for (int i = 0; i < ranges.length; i++) {
        DoubleRange range = ranges[i];
        longRanges[i] = new LongRange(range.label, NumericUtils.doubleToSortableLong(range.min), true, NumericUtils.doubleToSortableLong(range.max), true);
    }
    LongRangeCounter counter = new LongRangeCounter(longRanges);
    int missingCount = 0;
    for (MatchingDocs hits : matchingDocs) {
        DoubleValues fv = valueSource.getValues(hits.context, null);
        totCount += hits.totalHits;
        final DocIdSetIterator fastMatchDocs;
        if (fastMatchQuery != null) {
            final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
            final IndexSearcher searcher = new IndexSearcher(topLevelContext);
            searcher.setQueryCache(null);
            final Weight fastMatchWeight = searcher.createNormalizedWeight(fastMatchQuery, false);
            Scorer s = fastMatchWeight.scorer(hits.context);
            if (s == null) {
                continue;
            }
            fastMatchDocs = s.iterator();
        } else {
            fastMatchDocs = null;
        }
        DocIdSetIterator docs = hits.bits.iterator();
        for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
            if (fastMatchDocs != null) {
                int fastMatchDoc = fastMatchDocs.docID();
                if (fastMatchDoc < doc) {
                    fastMatchDoc = fastMatchDocs.advance(doc);
                }
                if (doc != fastMatchDoc) {
                    doc = docs.advance(fastMatchDoc);
                    continue;
                }
            }
            // Skip missing docs:
            if (fv.advanceExact(doc)) {
                counter.add(NumericUtils.doubleToSortableLong(fv.doubleValue()));
            } else {
                missingCount++;
            }
            doc = docs.nextDoc();
        }
    }
    missingCount += counter.fillCounts(counts);
    totCount -= missingCount;
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) DoubleValues(org.apache.lucene.search.DoubleValues) Scorer(org.apache.lucene.search.Scorer) IndexReaderContext(org.apache.lucene.index.IndexReaderContext) Weight(org.apache.lucene.search.Weight) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 18 with DocIdSetIterator

use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.

the class TaxonomyFacetCounts method count.

private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    IntsRef scratch = new IntsRef();
    for (MatchingDocs hits : matchingDocs) {
        OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            ords.get(doc, scratch);
            for (int i = 0; i < scratch.length; i++) {
                values[scratch.ints[scratch.offset + i]]++;
            }
        }
    }
    rollup();
}
Also used : MatchingDocs(org.apache.lucene.facet.FacetsCollector.MatchingDocs) IntsRef(org.apache.lucene.util.IntsRef) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator)

Example 19 with DocIdSetIterator

use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.

the class TestUnifiedHighlighter method testCustomFieldValueSource.

public void testCustomFieldValueSource() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, fieldType);
    doc.add(body);
    iw.addDocument(doc);
    IndexReader ir = iw.getReader();
    iw.close();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        @Override
        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            assert fields.length == 1;
            assert docIter.cost() == 1;
            docIter.nextDoc();
            return Collections.singletonList(new CharSequence[] { text });
        }

        @Override
        protected BreakIterator getBreakIterator(String field) {
            return new WholeBreakIterator();
        }
    };
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
    ir.close();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PhraseQuery(org.apache.lucene.search.PhraseQuery) PrefixQuery(org.apache.lucene.search.PrefixQuery) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) IndexReader(org.apache.lucene.index.IndexReader) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter)

Example 20 with DocIdSetIterator

use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.

the class FunctionMatchQuery method createWeight.

@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
            DoubleValues values = source.getValues(context, null);
            DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
            TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {

                @Override
                public boolean matches() throws IOException {
                    return values.advanceExact(approximation.docID()) && filter.test(values.doubleValue());
                }

                @Override
                public float matchCost() {
                    // TODO maybe DoubleValuesSource should have a matchCost?
                    return 100;
                }
            };
            return new ConstantScoreScorer(this, score(), twoPhase);
        }
    };
}
Also used : TwoPhaseIterator(org.apache.lucene.search.TwoPhaseIterator) ConstantScoreScorer(org.apache.lucene.search.ConstantScoreScorer) DoubleValues(org.apache.lucene.search.DoubleValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) ConstantScoreWeight(org.apache.lucene.search.ConstantScoreWeight)

Aggregations

DocIdSetIterator (org.apache.lucene.search.DocIdSetIterator)68 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)28 Scorer (org.apache.lucene.search.Scorer)15 DocIdSet (org.apache.lucene.search.DocIdSet)14 Weight (org.apache.lucene.search.Weight)12 ConstantScoreScorer (org.apache.lucene.search.ConstantScoreScorer)10 BitSet (org.apache.lucene.util.BitSet)10 Bits (org.apache.lucene.util.Bits)10 BytesRef (org.apache.lucene.util.BytesRef)10 IOException (java.io.IOException)9 MatchingDocs (org.apache.lucene.facet.FacetsCollector.MatchingDocs)8 IndexSearcher (org.apache.lucene.search.IndexSearcher)8 SortedDocValues (org.apache.lucene.index.SortedDocValues)7 ConstantScoreWeight (org.apache.lucene.search.ConstantScoreWeight)7 TwoPhaseIterator (org.apache.lucene.search.TwoPhaseIterator)7 SortedSetDocValues (org.apache.lucene.index.SortedSetDocValues)6 Query (org.apache.lucene.search.Query)6 Document (org.apache.lucene.document.Document)5 IndexReader (org.apache.lucene.index.IndexReader)5 LeafReader (org.apache.lucene.index.LeafReader)5