use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class SortedIntDocSet method getTopFilter.
@Override
public Filter getTopFilter() {
return new Filter() {
int lastEndIdx = 0;
@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
LeafReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
// one past the max doc in this segment.
final int max = base + maxDoc;
int sidx = Math.max(0, lastEndIdx);
if (sidx > 0 && docs[sidx - 1] >= base) {
// oops, the lastEndIdx isn't correct... we must have been used
// in a multi-threaded context, or the indexreaders are being
// used out-of-order. start at 0.
sidx = 0;
}
if (sidx < docs.length && docs[sidx] < base) {
// if docs[sidx] is < base, we need to seek to find the real start.
sidx = findIndex(docs, base, sidx, docs.length - 1);
}
final int startIdx = sidx;
// Largest possible end index is limited to the start index
// plus the number of docs contained in the segment. Subtract 1 since
// the end index is inclusive.
int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;
// find the real end
eidx = findIndex(docs, max, startIdx, eidx) - 1;
final int endIdx = eidx;
lastEndIdx = endIdx;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
@Override
public int advance(int target) {
if (idx > endIdx || target == NO_MORE_DOCS)
return adjustedDoc = NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target)
return adjustedDoc = rawDoc - base;
int high = endIdx;
// binary search
while (idx <= high) {
int mid = (idx + high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid + 1;
} else if (rawDoc > target) {
high = mid - 1;
} else {
idx = mid + 1;
return adjustedDoc = rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc = NO_MORE_DOCS;
}
}
@Override
public long cost() {
return docs.length;
}
};
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(docs);
}
@Override
public Bits bits() {
// random access is expensive for this set
return null;
}
}, acceptDocs2);
}
@Override
public String toString(String field) {
return "SortedIntDocSetTopFilter";
}
// Equivalence should/could be based on docs here? How did it work previously?
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class PrefixTreeFacetCounter method compute.
/** Lower-level per-leaf segment method. */
public static void compute(final PrefixTreeStrategy strategy, final LeafReaderContext context, final Bits acceptDocs, final Shape queryShape, final int facetLevel, final FacetVisitor facetVisitor) throws IOException {
if (acceptDocs != null && acceptDocs.length() != context.reader().maxDoc()) {
throw new IllegalArgumentException("acceptDocs bits length " + acceptDocs.length() + " != leaf maxdoc " + context.reader().maxDoc());
}
final SpatialPrefixTree tree = strategy.getGrid();
//scanLevel is an optimization knob of AbstractVisitingPrefixTreeFilter. It's unlikely
// another scanLevel would be much faster and it tends to be a risky knob (can help a little, can hurt a ton).
// TODO use RPT's configured scan level? Do we know better here? Hard to say.
final int scanLevel = tree.getMaxLevels();
//AbstractVisitingPrefixTreeFilter is a Lucene Filter. We don't need a filter; we use it for its great prefix-tree
// traversal code. TODO consider refactoring if/when it makes sense (more use cases than this)
new AbstractVisitingPrefixTreeQuery(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel) {
@Override
public String toString(String field) {
//un-used
return "anonPrefixTreeQuery";
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext contexts) throws IOException {
//same thing, FYI. (constant)
assert facetLevel == super.detailLevel;
return new VisitorTemplate(context) {
@Override
protected void start() throws IOException {
facetVisitor.startOfSegment();
}
@Override
protected DocIdSet finish() throws IOException {
//unused;
return null;
}
@Override
protected boolean visitPrefix(Cell cell) throws IOException {
// At facetLevel...
if (cell.getLevel() == facetLevel) {
// Count docs
//we're not a leaf but we treat it as such at facet level
visitLeaf(cell);
//don't descend further; this is enough detail
return false;
}
//TODO this opt should move to VisitorTemplate (which contains an optimization TODO to this effect)
if (cell.getLevel() == facetLevel - 1 || termsEnum.docFreq() == 1) {
if (!hasDocsAtThisTerm()) {
return false;
}
}
return true;
}
@Override
protected void visitLeaf(Cell cell) throws IOException {
final int count = countDocsAtThisTerm();
if (count > 0) {
facetVisitor.visit(cell, count);
}
}
private int countDocsAtThisTerm() throws IOException {
if (acceptDocs == null) {
return termsEnum.docFreq();
}
int count = 0;
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (acceptDocs.get(postingsEnum.docID()) == false) {
continue;
}
count++;
}
return count;
}
private boolean hasDocsAtThisTerm() throws IOException {
if (acceptDocs == null) {
return true;
}
postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
int nextDoc = postingsEnum.nextDoc();
while (nextDoc != DocIdSetIterator.NO_MORE_DOCS && acceptDocs.get(nextDoc) == false) {
nextDoc = postingsEnum.nextDoc();
}
return nextDoc != DocIdSetIterator.NO_MORE_DOCS;
}
}.getDocIdSet();
}
}.getDocIdSet(context);
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class QueryBitSetProducer method getBitSet.
@Override
public BitSet getBitSet(LeafReaderContext context) throws IOException {
final LeafReader reader = context.reader();
final IndexReader.CacheHelper cacheHelper = reader.getCoreCacheHelper();
DocIdSet docIdSet = null;
if (cacheHelper != null) {
docIdSet = cache.get(cacheHelper.getKey());
}
if (docIdSet == null) {
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
final IndexSearcher searcher = new IndexSearcher(topLevelContext);
searcher.setQueryCache(null);
final Weight weight = searcher.createNormalizedWeight(query, false);
final Scorer s = weight.scorer(context);
if (s == null) {
docIdSet = DocIdSet.EMPTY;
} else {
docIdSet = new BitDocIdSet(BitSet.of(s.iterator(), context.reader().maxDoc()));
}
if (cacheHelper != null) {
cache.put(cacheHelper.getKey(), docIdSet);
}
}
return docIdSet == DocIdSet.EMPTY ? null : ((BitDocIdSet) docIdSet).bits();
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class TestFilteredDocIdSet method testNullIteratorFilteredDocIdSet.
public void testNullIteratorFilteredDocIdSet() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(newStringField("c", "val", Field.Store.NO));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
writer.close();
// First verify the document is searchable.
IndexSearcher searcher = newSearcher(reader);
Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits);
// Now search w/ a Filter which returns a null DocIdSet
Filter f = new Filter() {
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) {
final DocIdSet innerNullIteratorSet = new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return null;
}
@Override
public long ramBytesUsed() {
return 0L;
}
};
return new FilteredDocIdSet(innerNullIteratorSet) {
@Override
protected boolean match(int docid) {
return true;
}
};
}
@Override
public String toString(String field) {
return "nullDocIdSetFilter";
}
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
Query filtered = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), Occur.MUST).add(f, Occur.FILTER).build();
Assert.assertEquals(0, searcher.search(filtered, 10).totalHits);
reader.close();
dir.close();
}
use of org.apache.lucene.search.DocIdSet in project lucene-solr by apache.
the class TestFilteredDocIdSet method testFilteredDocIdSet.
public void testFilteredDocIdSet() throws Exception {
final int maxdoc = 10;
final DocIdSet innerSet = new DocIdSet() {
@Override
public long ramBytesUsed() {
return 0L;
}
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int docid = -1;
@Override
public int docID() {
return docid;
}
@Override
public int nextDoc() {
docid++;
return docid < maxdoc ? docid : (docid = NO_MORE_DOCS);
}
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
@Override
public long cost() {
return 1;
}
};
}
};
DocIdSet filteredSet = new FilteredDocIdSet(innerSet) {
@Override
protected boolean match(int docid) {
//validate only even docids
return docid % 2 == 0;
}
};
DocIdSetIterator iter = filteredSet.iterator();
ArrayList<Integer> list = new ArrayList<>();
int doc = iter.advance(3);
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
list.add(Integer.valueOf(doc));
while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
list.add(Integer.valueOf(doc));
}
}
int[] docs = new int[list.size()];
int c = 0;
Iterator<Integer> intIter = list.iterator();
while (intIter.hasNext()) {
docs[c++] = intIter.next().intValue();
}
int[] answer = new int[] { 4, 6, 8 };
boolean same = Arrays.equals(answer, docs);
if (!same) {
System.out.println("answer: " + Arrays.toString(answer));
System.out.println("gotten: " + Arrays.toString(docs));
fail();
}
}
Aggregations