use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.
the class RandomSamplingFacetsCollector method createSample.
/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
int maxdoc = docs.context.reader().maxDoc();
// TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
int binSize = (int) (1.0 / samplingRate);
try {
int counter = 0;
int limit, randomIndex;
if (leftoverBin != NOT_CALCULATED) {
limit = leftoverBin;
// either NOT_CALCULATED, which means we already sampled from that bin,
// or the next document to sample
randomIndex = leftoverIndex;
} else {
limit = binSize;
randomIndex = random.nextInt(binSize);
}
final DocIdSetIterator it = docs.bits.iterator();
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
if (counter == randomIndex) {
sampleDocs.set(doc);
}
counter++;
if (counter >= limit) {
counter = 0;
limit = binSize;
randomIndex = random.nextInt(binSize);
}
}
if (counter == 0) {
// we either exhausted the bin and the iterator at the same time, or
// this segment had no results. in the latter case we might want to
// carry leftover to the next segment as is, but that complicates the
// code and doesn't seem so important.
leftoverBin = leftoverIndex = NOT_CALCULATED;
} else {
leftoverBin = limit - counter;
if (randomIndex > counter) {
// the document to sample is in the next bin
leftoverIndex = randomIndex - counter;
} else if (randomIndex < counter) {
// we sampled a document from the bin, so just skip over remaining
// documents in the bin in the next segment.
leftoverIndex = NOT_CALCULATED;
}
}
return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.
the class DoubleRangeFacetCounts method count.
private void count(DoubleValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
DoubleRange[] ranges = (DoubleRange[]) this.ranges;
LongRange[] longRanges = new LongRange[ranges.length];
for (int i = 0; i < ranges.length; i++) {
DoubleRange range = ranges[i];
longRanges[i] = new LongRange(range.label, NumericUtils.doubleToSortableLong(range.min), true, NumericUtils.doubleToSortableLong(range.max), true);
}
LongRangeCounter counter = new LongRangeCounter(longRanges);
int missingCount = 0;
for (MatchingDocs hits : matchingDocs) {
DoubleValues fv = valueSource.getValues(hits.context, null);
totCount += hits.totalHits;
final DocIdSetIterator fastMatchDocs;
if (fastMatchQuery != null) {
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
searcher.setQueryCache(null);
final Weight fastMatchWeight = searcher.createNormalizedWeight(fastMatchQuery, false);
Scorer s = fastMatchWeight.scorer(hits.context);
if (s == null) {
continue;
}
fastMatchDocs = s.iterator();
} else {
fastMatchDocs = null;
}
DocIdSetIterator docs = hits.bits.iterator();
for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
if (fastMatchDocs != null) {
int fastMatchDoc = fastMatchDocs.docID();
if (fastMatchDoc < doc) {
fastMatchDoc = fastMatchDocs.advance(doc);
}
if (doc != fastMatchDoc) {
doc = docs.advance(fastMatchDoc);
continue;
}
}
// Skip missing docs:
if (fv.advanceExact(doc)) {
counter.add(NumericUtils.doubleToSortableLong(fv.doubleValue()));
} else {
missingCount++;
}
doc = docs.nextDoc();
}
}
missingCount += counter.fillCounts(counts);
totCount -= missingCount;
}
use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.
the class TaxonomyFacetCounts method count.
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
IntsRef scratch = new IntsRef();
for (MatchingDocs hits : matchingDocs) {
OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
DocIdSetIterator docs = hits.bits.iterator();
int doc;
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
ords.get(doc, scratch);
for (int i = 0; i < scratch.length; i++) {
values[scratch.ints[scratch.offset + i]]++;
}
}
}
rollup();
}
use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.
the class TestUnifiedHighlighter method testCustomFieldValueSource.
public void testCustomFieldValueSource() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
Document doc = new Document();
final String text = "This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test.";
Field body = new Field("body", text, fieldType);
doc.add(body);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected List<CharSequence[]> loadFieldValues(String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold) throws IOException {
assert fields.length == 1;
assert docIter.cost() == 1;
docIter.nextDoc();
return Collections.singletonList(new CharSequence[] { text });
}
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);
ir.close();
}
use of org.apache.lucene.search.DocIdSetIterator in project lucene-solr by apache.
the class FunctionMatchQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DoubleValues values = source.getValues(context, null);
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {
return values.advanceExact(approximation.docID()) && filter.test(values.doubleValue());
}
@Override
public float matchCost() {
// TODO maybe DoubleValuesSource should have a matchCost?
return 100;
}
};
return new ConstantScoreScorer(this, score(), twoPhase);
}
};
}
Aggregations