/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
    int maxdoc = docs.context.reader().maxDoc();
    // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
    FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
    int binSize = (int) (1.0 / samplingRate);
    try {
        int counter = 0;
        int limit, randomIndex;
        if (leftoverBin != NOT_CALCULATED) {
            limit = leftoverBin;
            // either NOT_CALCULATED, which means we already sampled from that bin,
            // or the next document to sample
            randomIndex = leftoverIndex;
        } else {
            limit = binSize;
            randomIndex = random.nextInt(binSize);
        final DocIdSetIterator it = docs.bits.iterator();
        for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
            if (counter == randomIndex) {
            if (counter >= limit) {
                counter = 0;
                limit = binSize;
                randomIndex = random.nextInt(binSize);
        if (counter == 0) {
            // we either exhausted the bin and the iterator at the same time, or
            // this segment had no results. in the latter case we might want to
            // carry leftover to the next segment as is, but that complicates the
            // code and doesn't seem so important.
            leftoverBin = leftoverIndex = NOT_CALCULATED;
        } else {
            leftoverBin = limit - counter;
            if (randomIndex > counter) {
                // the document to sample is in the next bin
                leftoverIndex = randomIndex - counter;
            } else if (randomIndex < counter) {
                // we sampled a document from the bin, so just skip over remaining
                // documents in the bin in the next segment.
                leftoverIndex = NOT_CALCULATED;
        return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
    } catch (IOException e) {
        throw new RuntimeException(e);
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
    DoubleRange[] ranges = (DoubleRange[]) this.ranges;
    LongRange[] longRanges = new LongRange[ranges.length];
    for (int i = 0; i < ranges.length; i++) {
        DoubleRange range = ranges[i];
        longRanges[i] = new LongRange(range.label, NumericUtils.doubleToSortableLong(range.min), true, NumericUtils.doubleToSortableLong(range.max), true);
    LongRangeCounter counter = new LongRangeCounter(longRanges);
    int missingCount = 0;
    for (MatchingDocs hits : matchingDocs) {
        DoubleValues fv = valueSource.getValues(hits.context, null);
        totCount += hits.totalHits;
        final DocIdSetIterator fastMatchDocs;
        if (fastMatchQuery != null) {
            final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
            final IndexSearcher searcher = new IndexSearcher(topLevelContext);
            final Weight fastMatchWeight = searcher.createNormalizedWeight(fastMatchQuery, false);
            Scorer s = fastMatchWeight.scorer(hits.context);
            if (s == null) {
            fastMatchDocs = s.iterator();
        } else {
            fastMatchDocs = null;
        DocIdSetIterator docs = hits.bits.iterator();
        for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
            if (fastMatchDocs != null) {
                int fastMatchDoc = fastMatchDocs.docID();
                if (fastMatchDoc < doc) {
                    fastMatchDoc = fastMatchDocs.advance(doc);
                if (doc != fastMatchDoc) {
                    doc = docs.advance(fastMatchDoc);
            // Skip missing docs:
            if (fv.advanceExact(doc)) {
            } else {
            doc = docs.nextDoc();
    missingCount += counter.fillCounts(counts);
    totCount -= missingCount;
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    IntsRef scratch = new IntsRef();
    for (MatchingDocs hits : matchingDocs) {
        OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
        DocIdSetIterator docs = hits.bits.iterator();
        int doc;
        while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            ords.get(doc, scratch);
            for (int i = 0; i < scratch.length; i++) {
                values[scratch.ints[scratch.offset + i]]++;
public void testCustomFieldValueSource() throws Exception {
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
    Document doc = new Document();
    final String text = "This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.";
    Field body = new Field("body", text, fieldType);
    IndexReader ir = iw.getReader();
    IndexSearcher searcher = newSearcher(ir);
    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

        protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
            assert fields.length == 1;
            assert docIter.cost() == 1;
            return Collections.singletonList(new CharSequence[] { text });

        protected BreakIterator getBreakIterator(String field) {
            return new WholeBreakIterator();
    Query query = new TermQuery(new Term("body", "test"));
    TopDocs topDocs =, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits);
    String[] snippets = highlighter.highlight("body", query, topDocs, 2);
    assertEquals(1, snippets.length);
    assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
    return new ConstantScoreWeight(this, boost) {

        public Scorer scorer(LeafReaderContext context) throws IOException {
            DoubleValues values = source.getValues(context, null);
            DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
            TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {

                public boolean matches() throws IOException {
                    return values.advanceExact(approximation.docID()) && filter.test(values.doubleValue());

                public float matchCost() {
                    // TODO maybe DoubleValuesSource should have a matchCost?
                    return 100;
            return new ConstantScoreScorer(this, score(), twoPhase);
Also used : TwoPhaseIterator( ConstantScoreScorer( DoubleValues( LeafReaderContext(org.apache.lucene.index.LeafReaderContext) DocIdSetIterator( ConstantScoreWeight(


