use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class TermDocsCache method termDocs.
/**
* Returns the {@link TermDocs} for the given term.
*
* @param t the term.
* @return the term docs for the given term.
* @throws IOException if an error occurs while reading from the index.
*/
public TermDocs termDocs(final Term t) throws IOException {
if (t == null || t.field() != field) {
return reader.termDocs(t);
}
String text = t.text();
if (unknownValues.get(text) != null) {
log.debug("EmptyTermDocs({},{})", field, text);
return EmptyTermDocs.INSTANCE;
}
// maintain cache
CacheEntry entry;
synchronized (cache) {
entry = cache.get(text);
if (entry == null) {
// check space
if (cache.size() >= CACHE_SIZE) {
// prune half of them and adjust the rest
CacheEntry[] entries = cache.values().toArray(new CacheEntry[cache.size()]);
Arrays.sort(entries);
int threshold = entries[CACHE_SIZE / 2].numAccessed;
for (Iterator<Map.Entry<String, CacheEntry>> it = cache.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<String, CacheEntry> e = it.next();
if (e.getValue().numAccessed <= threshold) {
// prune
it.remove();
} else {
// adjust
CacheEntry ce = e.getValue();
ce.numAccessed = (int) Math.sqrt(ce.numAccessed);
}
}
}
entry = new CacheEntry();
cache.put(text, entry);
} else {
entry.numAccessed++;
}
}
// that are read only irregularly.
if (entry.numAccessed < 10) {
if (log.isDebugEnabled()) {
log.debug("#{} TermDocs({},{})", new Object[] { entry.numAccessed, field, text });
}
return reader.termDocs(t);
}
if (entry.bits == null) {
// collect bits
BitSet bits = null;
TermDocs tDocs = reader.termDocs(t);
try {
while (tDocs.next()) {
if (bits == null) {
bits = new BitSet(reader.maxDoc());
}
bits.set(tDocs.doc());
}
} finally {
tDocs.close();
}
if (bits != null) {
entry.bits = bits;
}
}
if (entry.bits == null) {
// none collected
unknownValues.put(text, text);
return EmptyTermDocs.INSTANCE;
} else {
if (log.isDebugEnabled()) {
log.debug("CachedTermDocs({},{},{}/{})", new Object[] { field, text, entry.bits.cardinality(), reader.maxDoc() });
}
return new CachedTermDocs(entry.bits);
}
}
use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.
the class MatchAllScorer method calculateDocFilter.
/**
* Calculates a BitSet filter that includes all the nodes
* that have content in properties according to the field name
* passed in the constructor of this MatchAllScorer.
*
* @throws IOException if an error occurs while reading from
* the search index.
*/
@SuppressWarnings({ "unchecked" })
private void calculateDocFilter(PerQueryCache cache) throws IOException {
Map<String, BitSet> readerCache = (Map<String, BitSet>) cache.get(MatchAllScorer.class, reader);
if (readerCache == null) {
readerCache = new HashMap<String, BitSet>();
cache.put(MatchAllScorer.class, reader, readerCache);
}
// get BitSet for field
docFilter = readerCache.get(field);
if (docFilter != null) {
// use cached BitSet;
return;
}
// otherwise calculate new
docFilter = new BitSet(reader.maxDoc());
// we match all terms
String namedValue = FieldNames.createNamedValue(field, "");
TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, namedValue));
try {
TermDocs docs = reader.termDocs();
try {
while (terms.term() != null && terms.term().field() == FieldNames.PROPERTIES && terms.term().text().startsWith(namedValue)) {
docs.seek(terms);
while (docs.next()) {
docFilter.set(docs.doc());
}
terms.next();
}
} finally {
docs.close();
}
} finally {
terms.close();
}
// put BitSet into cache
readerCache.put(field, docFilter);
}
use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.
the class PrefixFilter method getDocIdSet.
@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
FixedBitSet result = new FixedBitSet(reader.maxDoc());
Term term = new Term(this.field, this.prefix);
TermEnum te = reader.terms(term);
TermDocs td = reader.termDocs();
try {
do {
term = te.term();
// noinspection StringEquality
if (term == null || this.field != term.field() || !term.text().startsWith(this.prefix)) {
break;
}
td.seek(te.term());
while (td.next()) {
result.set(td.doc());
}
} while (te.next());
} finally {
te.close();
td.close();
}
return result;
}
use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.
the class TermsFilter method getDocIdSet.
@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
FixedBitSet result = new FixedBitSet(reader.maxDoc());
TermDocs td = reader.termDocs();
try {
for (Term term : this.terms) {
td.seek(term);
while (td.next()) {
result.set(td.doc());
}
}
} finally {
td.close();
}
return result;
}
use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.
the class FilteredMultiReader method termDocs.
@Override
public TermDocs termDocs(final Term term) throws IOException {
TermDocs result = termDocs();
result.seek(term);
return result;
}
Aggregations