use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class TestDocSet method doFilterTest.
public void doFilterTest(IndexReader reader) throws IOException {
IndexReaderContext topLevelContext = reader.getContext();
FixedBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc() + 1));
DocSet a = new BitDocSet(bs);
DocSet b = getIntDocSet(bs);
Filter fa = a.getTopFilter();
Filter fb = b.getTopFilter();
/* top level filters are no longer supported
// test top-level
DocIdSet da = fa.getDocIdSet(topLevelContext);
DocIdSet db = fb.getDocIdSet(topLevelContext);
doTestIteratorEqual(da, db);
***/
DocIdSet da;
DocIdSet db;
List<LeafReaderContext> leaves = topLevelContext.leaves();
// first test in-sequence sub readers
for (LeafReaderContext readerContext : leaves) {
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
int nReaders = leaves.size();
// now test out-of-sequence sub readers
for (int i = 0; i < nReaders; i++) {
LeafReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
}
use of org.apache.lucene.search.DocIdSet in project greplin-lucene-utils by Cue.
the class HasFieldFilterTest method testBasics.
@Test
public void testBasics() throws Exception {
Filter field1 = new HasFieldFilter("field1");
Filter field2 = new HasFieldFilter("field2");
DocIdSet hasField1 = field1.getDocIdSet(this.reader);
Assert.assertTrue(hasField1.isCacheable());
DocIdSet hasField2 = field2.getDocIdSet(this.reader);
Assert.assertTrue(hasField2.isCacheable());
assertDocIds(hasField1, true, false);
assertDocIds(hasField2, true, true);
}
use of org.apache.lucene.search.DocIdSet in project elasticsearch by elastic.
the class TermsSliceQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final DocIdSet disi = build(context.reader());
final DocIdSetIterator leafIt = disi.iterator();
return new ConstantScoreScorer(this, score(), leafIt);
}
};
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class FacetFieldProcessorByArrayDV method collectDocs.
@Override
protected void collectDocs() throws IOException {
int domainSize = fcontext.base.size();
if (nTerms <= 0 || domainSize < effectiveMincount) {
// TODO: what about allBuckets? missing bucket?
return;
}
// TODO: refactor some of this logic into a base class
boolean countOnly = collectAcc == null && allBucketsAcc == null;
boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
// Are we expecting many hits per bucket?
// FUTURE: pro-rate for nTerms?
// FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
// FUTURE: take into account that bigger ord maps are more expensive than smaller ones
// One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
// than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
// this was for heap docvalues produced by UninvertingReader
// Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
long domainMultiplier = multiValuedField ? 4L : 2L;
// +3 to increase test coverage with small tests
boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);
// If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
// then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
// FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
// the docid is not used)
boolean canDoPerSeg = countOnly && fullRange;
boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
// internal - override perSeg heuristic
if (freq.perSeg != null)
accumSeg = canDoPerSeg && freq.perSeg;
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReaderFirstPhase(subCtx);
// solr docsets already exclude any deleted docs
DocIdSet dis = filter.getDocIdSet(subCtx, null);
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
if (accumSeg) {
collectPerSeg(singleDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(singleDv, disi, toGlobal);
} else {
collectDocs(singleDv, disi, toGlobal);
}
}
} else {
if (accumSeg) {
collectPerSeg(multiDv, disi, toGlobal);
} else {
if (canDoPerSeg && toGlobal != null) {
collectCounts(multiDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
}
// better GC
reuse = null;
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class AbstractPrefixTreeQuery method createWeight.
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
DocIdSet docSet = getDocIdSet(context);
if (docSet == null) {
return null;
}
DocIdSetIterator disi = docSet.iterator();
if (disi == null) {
return null;
}
return new ConstantScoreScorer(this, score(), disi);
}
};
}
Aggregations