Search in sources :

Example 6 with TermDocs

use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.

the class TermDocsCache method termDocs.

/**
     * Returns the {@link TermDocs} for the given term.
     *
     * @param t the term.
     * @return the term docs for the given term.
     * @throws IOException if an error occurs while reading from the index.
     */
public TermDocs termDocs(final Term t) throws IOException {
    if (t == null || t.field() != field) {
        return reader.termDocs(t);
    }
    String text = t.text();
    if (unknownValues.get(text) != null) {
        log.debug("EmptyTermDocs({},{})", field, text);
        return EmptyTermDocs.INSTANCE;
    }
    // maintain cache
    CacheEntry entry;
    synchronized (cache) {
        entry = cache.get(text);
        if (entry == null) {
            // check space
            if (cache.size() >= CACHE_SIZE) {
                // prune half of them and adjust the rest
                CacheEntry[] entries = cache.values().toArray(new CacheEntry[cache.size()]);
                Arrays.sort(entries);
                int threshold = entries[CACHE_SIZE / 2].numAccessed;
                for (Iterator<Map.Entry<String, CacheEntry>> it = cache.entrySet().iterator(); it.hasNext(); ) {
                    Map.Entry<String, CacheEntry> e = it.next();
                    if (e.getValue().numAccessed <= threshold) {
                        // prune
                        it.remove();
                    } else {
                        // adjust
                        CacheEntry ce = e.getValue();
                        ce.numAccessed = (int) Math.sqrt(ce.numAccessed);
                    }
                }
            }
            entry = new CacheEntry();
            cache.put(text, entry);
        } else {
            entry.numAccessed++;
        }
    }
    // that are read only irregularly.
    if (entry.numAccessed < 10) {
        if (log.isDebugEnabled()) {
            log.debug("#{} TermDocs({},{})", new Object[] { entry.numAccessed, field, text });
        }
        return reader.termDocs(t);
    }
    if (entry.bits == null) {
        // collect bits
        BitSet bits = null;
        TermDocs tDocs = reader.termDocs(t);
        try {
            while (tDocs.next()) {
                if (bits == null) {
                    bits = new BitSet(reader.maxDoc());
                }
                bits.set(tDocs.doc());
            }
        } finally {
            tDocs.close();
        }
        if (bits != null) {
            entry.bits = bits;
        }
    }
    if (entry.bits == null) {
        // none collected
        unknownValues.put(text, text);
        return EmptyTermDocs.INSTANCE;
    } else {
        if (log.isDebugEnabled()) {
            log.debug("CachedTermDocs({},{},{}/{})", new Object[] { field, text, entry.bits.cardinality(), reader.maxDoc() });
        }
        return new CachedTermDocs(entry.bits);
    }
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) BitSet(java.util.BitSet) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 7 with TermDocs

use of org.apache.lucene.index.TermDocs in project jackrabbit by apache.

the class MatchAllScorer method calculateDocFilter.

/**
     * Calculates a BitSet filter that includes all the nodes
     * that have content in properties according to the field name
     * passed in the constructor of this MatchAllScorer.
     *
     * @throws IOException if an error occurs while reading from
     *                     the search index.
     */
@SuppressWarnings({ "unchecked" })
private void calculateDocFilter(PerQueryCache cache) throws IOException {
    Map<String, BitSet> readerCache = (Map<String, BitSet>) cache.get(MatchAllScorer.class, reader);
    if (readerCache == null) {
        readerCache = new HashMap<String, BitSet>();
        cache.put(MatchAllScorer.class, reader, readerCache);
    }
    // get BitSet for field
    docFilter = readerCache.get(field);
    if (docFilter != null) {
        // use cached BitSet;
        return;
    }
    // otherwise calculate new
    docFilter = new BitSet(reader.maxDoc());
    // we match all terms
    String namedValue = FieldNames.createNamedValue(field, "");
    TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, namedValue));
    try {
        TermDocs docs = reader.termDocs();
        try {
            while (terms.term() != null && terms.term().field() == FieldNames.PROPERTIES && terms.term().text().startsWith(namedValue)) {
                docs.seek(terms);
                while (docs.next()) {
                    docFilter.set(docs.doc());
                }
                terms.next();
            }
        } finally {
            docs.close();
        }
    } finally {
        terms.close();
    }
    // put BitSet into cache
    readerCache.put(field, docFilter);
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) BitSet(java.util.BitSet) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum) Map(java.util.Map) HashMap(java.util.HashMap)

Example 8 with TermDocs

use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.

the class PrefixFilter method getDocIdSet.

@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    FixedBitSet result = new FixedBitSet(reader.maxDoc());
    Term term = new Term(this.field, this.prefix);
    TermEnum te = reader.terms(term);
    TermDocs td = reader.termDocs();
    try {
        do {
            term = te.term();
            // noinspection StringEquality
            if (term == null || this.field != term.field() || !term.text().startsWith(this.prefix)) {
                break;
            }
            td.seek(te.term());
            while (td.next()) {
                result.set(td.doc());
            }
        } while (te.next());
    } finally {
        te.close();
        td.close();
    }
    return result;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) TermDocs(org.apache.lucene.index.TermDocs) Term(org.apache.lucene.index.Term) TermEnum(org.apache.lucene.index.TermEnum)

Example 9 with TermDocs

use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.

the class TermsFilter method getDocIdSet.

@Override
public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
    FixedBitSet result = new FixedBitSet(reader.maxDoc());
    TermDocs td = reader.termDocs();
    try {
        for (Term term : this.terms) {
            td.seek(term);
            while (td.next()) {
                result.set(td.doc());
            }
        }
    } finally {
        td.close();
    }
    return result;
}
Also used : FixedBitSet(org.apache.lucene.util.FixedBitSet) TermDocs(org.apache.lucene.index.TermDocs) Term(org.apache.lucene.index.Term)

Example 10 with TermDocs

use of org.apache.lucene.index.TermDocs in project greplin-lucene-utils by Cue.

the class FilteredMultiReader method termDocs.

@Override
public TermDocs termDocs(final Term term) throws IOException {
    TermDocs result = termDocs();
    result.seek(term);
    return result;
}
Also used : TermDocs(org.apache.lucene.index.TermDocs) HackMultiTermDocs(org.apache.lucene.index.HackMultiTermDocs)

Aggregations

TermDocs (org.apache.lucene.index.TermDocs)17 Term (org.apache.lucene.index.Term)13 TermEnum (org.apache.lucene.index.TermEnum)5 Document (org.apache.lucene.document.Document)4 BitSet (java.util.BitSet)3 RepositoryException (javax.jcr.RepositoryException)3 IOException (java.io.IOException)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 NodeId (org.apache.jackrabbit.core.id.NodeId)2 IndexReader (org.apache.lucene.index.IndexReader)2 FixedBitSet (org.apache.lucene.util.FixedBitSet)2 MalformedURLException (java.net.MalformedURLException)1 LinkedHashMap (java.util.LinkedHashMap)1 WeakHashMap (java.util.WeakHashMap)1 Node (javax.jcr.Node)1 Session (javax.jcr.Session)1 InvalidQueryException (javax.jcr.query.InvalidQueryException)1 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)1 FileSystemException (org.apache.jackrabbit.core.fs.FileSystemException)1