use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class SolrIndexSearcher method getDocSet.
/**
* Returns the set of document ids matching all queries. This method is cache-aware and attempts to retrieve the
* answer from the cache if possible. If the answer was not cached, it may have been inserted into the cache as a
* result of this call. This method can handle negative queries.
* <p>
* The DocSet returned should <b>not</b> be modified.
*/
public DocSet getDocSet(List<Query> queries) throws IOException {
if (queries != null) {
for (Query q : queries) {
if (q instanceof ScoreFilter) {
return getDocSetScore(queries);
}
}
}
ProcessedFilter pf = getProcessedFilter(null, queries);
if (pf.answer != null)
return pf.answer;
DocSetCollector setCollector = new DocSetCollector(maxDoc());
Collector collector = setCollector;
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
for (final LeafReaderContext leaf : leafContexts) {
final LeafReader reader = leaf.reader();
Bits liveDocs = reader.getLiveDocs();
DocIdSet idSet = null;
if (pf.filter != null) {
idSet = pf.filter.getDocIdSet(leaf, liveDocs);
if (idSet == null)
continue;
}
DocIdSetIterator idIter = null;
if (idSet != null) {
idIter = idSet.iterator();
if (idIter == null)
continue;
// no need to check liveDocs
if (!pf.hasDeletedDocs)
liveDocs = null;
}
final LeafCollector leafCollector = collector.getLeafCollector(leaf);
int max = reader.maxDoc();
if (idIter == null) {
for (int docid = 0; docid < max; docid++) {
if (liveDocs != null && !liveDocs.get(docid))
continue;
leafCollector.collect(docid);
}
} else {
if (liveDocs != null) {
for (int docid = -1; (docid = idIter.advance(docid + 1)) < max; ) {
if (liveDocs.get(docid))
leafCollector.collect(docid);
}
} else {
for (int docid = -1; (docid = idIter.advance(docid + 1)) < max; ) {
leafCollector.collect(docid);
}
}
}
}
if (collector instanceof DelegatingCollector) {
((DelegatingCollector) collector).finish();
}
return DocSetUtil.getDocSet(setCollector, this);
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class SolrIndexSearcher method lookupId.
/**
* lookup the docid by the unique key field, and return the id *within* the leaf reader in the low 32 bits, and the
* index of the leaf reader in the high 32 bits. -1 is returned if not found.
*
* @lucene.internal
*/
public long lookupId(BytesRef idBytes) throws IOException {
String field = schema.getUniqueKeyField().getName();
for (int i = 0, c = leafContexts.size(); i < c; i++) {
final LeafReaderContext leaf = leafContexts.get(i);
final LeafReader reader = leaf.reader();
final Terms terms = reader.terms(field);
if (terms == null)
continue;
TermsEnum te = terms.iterator();
if (te.seekExact(idBytes)) {
PostingsEnum docs = te.postings(null, PostingsEnum.NONE);
docs = BitsFilteredPostingsEnum.wrap(docs, reader.getLiveDocs());
int id = docs.nextDoc();
if (id == DocIdSetIterator.NO_MORE_DOCS)
continue;
assert docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
return (((long) i) << 32) | id;
}
}
return -1;
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class SortedIntDocSet method getTopFilter.
@Override
public Filter getTopFilter() {
return new Filter() {
int lastEndIdx = 0;
@Override
public DocIdSet getDocIdSet(final LeafReaderContext context, final Bits acceptDocs) {
LeafReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
// one past the max doc in this segment.
final int max = base + maxDoc;
int sidx = Math.max(0, lastEndIdx);
if (sidx > 0 && docs[sidx - 1] >= base) {
// oops, the lastEndIdx isn't correct... we must have been used
// in a multi-threaded context, or the indexreaders are being
// used out-of-order. start at 0.
sidx = 0;
}
if (sidx < docs.length && docs[sidx] < base) {
// if docs[sidx] is < base, we need to seek to find the real start.
sidx = findIndex(docs, base, sidx, docs.length - 1);
}
final int startIdx = sidx;
// Largest possible end index is limited to the start index
// plus the number of docs contained in the segment. Subtract 1 since
// the end index is inclusive.
int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;
// find the real end
eidx = findIndex(docs, max, startIdx, eidx) - 1;
final int endIdx = eidx;
lastEndIdx = endIdx;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
@Override
public int advance(int target) {
if (idx > endIdx || target == NO_MORE_DOCS)
return adjustedDoc = NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target)
return adjustedDoc = rawDoc - base;
int high = endIdx;
// binary search
while (idx <= high) {
int mid = (idx + high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid + 1;
} else if (rawDoc > target) {
high = mid - 1;
} else {
idx = mid + 1;
return adjustedDoc = rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc = NO_MORE_DOCS;
}
}
@Override
public long cost() {
return docs.length;
}
};
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.sizeOf(docs);
}
@Override
public Bits bits() {
// random access is expensive for this set
return null;
}
}, acceptDocs2);
}
@Override
public String toString(String field) {
return "SortedIntDocSetTopFilter";
}
// Equivalence should/could be based on docs here? How did it work previously?
@Override
public boolean equals(Object other) {
return other == this;
}
@Override
public int hashCode() {
return System.identityHashCode(this);
}
};
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class PhraseHelper method getTermToSpans.
/**
* Collect a list of pre-positioned {@link Spans} for each term, given a reader that has just one document.
* It returns no mapping for query terms that occurs in a position insensitive way which therefore don't
* need to be filtered.
*/
Map<BytesRef, Spans> getTermToSpans(LeafReader leafReader, int doc) throws IOException {
if (spanQueries.isEmpty()) {
return Collections.emptyMap();
}
final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
// for each SpanQuery, collect the member spans into a map.
Map<BytesRef, Spans> result = new HashMap<>();
for (SpanQuery spanQuery : spanQueries) {
getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
}
return result;
}
use of org.apache.lucene.index.LeafReader in project lucene-solr by apache.
the class PostingsOffsetStrategy method getOffsetsEnums.
@Override
public List<OffsetsEnum> getOffsetsEnums(IndexReader reader, int docId, String content) throws IOException {
final LeafReader leafReader;
if (reader instanceof LeafReader) {
leafReader = (LeafReader) reader;
} else {
List<LeafReaderContext> leaves = reader.leaves();
LeafReaderContext leafReaderContext = leaves.get(ReaderUtil.subIndex(docId, leaves));
leafReader = leafReaderContext.reader();
// adjust 'doc' to be within this leaf reader
docId -= leafReaderContext.docBase;
}
return createOffsetsEnumsFromReader(leafReader, docId);
}
Aggregations