Search in sources :

Example 11 with DocSet

use of in project lucene-solr by apache.

the class StatsInfo method process.

public void process(ResponseBuilder rb) throws IOException {
    if (!rb.doStats)
    Map<String, StatsValues> statsValues = new LinkedHashMap<>();
    for (StatsField statsField : rb._statsInfo.getStatsFields()) {
        DocSet docs = statsField.computeBaseDocSet();
        statsValues.put(statsField.getOutputKey(), statsField.computeLocalStatsValues(docs));
    rb.rsp.add("stats", convertToResponse(statsValues));
Also used : DocSet( LinkedHashMap(java.util.LinkedHashMap)

Example 12 with DocSet

use of in project lucene-solr by apache.

the class FacetFieldProcessorByEnumTermsStream method _nextBucket.

private SimpleOrderedMap<Object> _nextBucket() throws IOException {
    DocSet termSet = null;
    try {
        while (term != null) {
            if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) {
            int df = termsEnum.docFreq();
            if (df < effectiveMincount) {
                term =;
            if (termSet != null) {
                // termSet.decref(); // OFF-HEAP
                termSet = null;
            int c = 0;
            if (hasSubFacets || df >= minDfFilterCache) {
                if (deState == null) {
                    deState = new SolrIndexSearcher.DocsEnumState();
                    deState.fieldName = sf.getName();
                    deState.liveDocs = fcontext.searcher.getSlowAtomicReader().getLiveDocs();
                    deState.termsEnum = termsEnum;
                    deState.postingsEnum = postingsEnum;
                    deState.minSetSizeCached = minDfFilterCache;
                if (hasSubFacets || !countOnly) {
                    DocSet termsAll = fcontext.searcher.getDocSet(deState);
                    termSet = docs.intersection(termsAll);
                    // termsAll.decref(); // OFF-HEAP
                    c = termSet.size();
                } else {
                    c = fcontext.searcher.numDocs(docs, deState);
                postingsEnum = deState.postingsEnum;
                if (!countOnly) {
                    collect(termSet, 0);
            } else {
                // We don't need the docset here (meaning no sub-facets).
                // if countOnly, then we are calculating some other stats...
                // lazy convert to fastForRandomSet
                if (fastForRandomSet == null) {
                    fastForRandomSet = docs;
                    if (docs instanceof SortedIntDocSet) {
                        // OFF-HEAP todo: also check for native version
                        SortedIntDocSet sset = (SortedIntDocSet) docs;
                        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
                // iterate over TermDocs to calculate the intersection
                postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
                if (postingsEnum instanceof MultiPostingsEnum) {
                    MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
                    int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
                    for (int subindex = 0; subindex < numSubs; subindex++) {
                        MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
                        if (sub.postingsEnum == null)
                        int base = sub.slice.start;
                        int docid;
                        if (countOnly) {
                            while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base))
                        } else {
                            while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base)) {
                                    collect(docid, 0);
                } else {
                    int docid;
                    if (countOnly) {
                        while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid))
                    } else {
                        while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid)) {
                                collect(docid, 0);
            if (c < effectiveMincount) {
                term =;
            // handle offset and limit
            if (bucketsToSkip > 0) {
                term =;
            if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
                return null;
            // set count in case other stats depend on it
            countAcc.incrementCount(0, c);
            // OK, we have a good bucket to return... first get bucket value before moving to next term
            Object bucketVal = sf.getType().toObject(sf, term);
            TermQuery bucketQuery = hasSubFacets ? new TermQuery(new Term(freq.field, term)) : null;
            term =;
            SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
            bucket.add("val", bucketVal);
            addStats(bucket, 0);
            if (hasSubFacets) {
                processSubs(bucket, bucketQuery, termSet, false, null);
            return bucket;
    } finally {
        if (termSet != null) {
            // termSet.decref();  // OFF-HEAP
            termSet = null;
    // end of the iteration
    return null;
Also used : SortedIntDocSet( HashDocSet( TermQuery( SolrIndexSearcher( Term(org.apache.lucene.index.Term) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) MultiPostingsEnum(org.apache.lucene.index.MultiPostingsEnum) HashDocSet( SortedIntDocSet( DocSet(

Example 13 with DocSet

use of in project lucene-solr by apache.

the class FacetFieldProcessor method calculateNumBuckets.

private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException {
    DocSet domain = fcontext.base;
    if (freq.prefix != null) {
        Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
        domain = fcontext.searcher.getDocSet(prefixFilter, domain);
    HLLAgg agg = new HLLAgg(freq.field);
    SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1);
    acc.collect(domain, 0);
    acc.key = "numBuckets";
    acc.setValues(target, 0);
Also used : Query( DocSet(

Example 14 with DocSet

use of in project lucene-solr by apache.

the class FacetProcessor method getFieldMissing.

static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
    SchemaField sf = searcher.getSchema().getField(fieldName);
    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
    DocSet answer = docs.andNot(hasVal);
    // hasVal.decref(); // OFF-HEAP
    return answer;
Also used : SchemaField(org.apache.solr.schema.SchemaField) BitDocSet( DocSet(

Example 15 with DocSet

use of in project lucene-solr by apache.

the class UnInvertedField method visitTerm.

protected void visitTerm(TermsEnum te, int termNum) throws IOException {
    if (termNum >= maxTermCounts.length) {
        // resize by doubling - for very large number of unique terms, expanding
        // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
        int[] newMaxTermCounts = new int[Math.min(Integer.MAX_VALUE - 16, maxTermCounts.length * 2)];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
        maxTermCounts = newMaxTermCounts;
    final BytesRef term = te.term();
    if (te.docFreq() > maxTermDocFreq) {
        // this makes a deep copy of the term bytes
        Term t = new Term(field, term);
        TopTerm topTerm = new TopTerm();
        topTerm.term = t.bytes();
        topTerm.termNum = termNum;
        topTerm.termQuery = new TermQuery(t);
        bigTerms.put(topTerm.termNum, topTerm);
        if (deState == null) {
            deState = new SolrIndexSearcher.DocsEnumState();
            deState.fieldName = field;
            deState.liveDocs = searcher.getSlowAtomicReader().getLiveDocs();
            // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
            deState.termsEnum = te;
            deState.postingsEnum = postingsEnum;
            deState.minSetSizeCached = maxTermDocFreq;
        postingsEnum = deState.postingsEnum;
        DocSet set = searcher.getDocSet(deState);
        maxTermCounts[termNum] = set.size();
Also used : TermQuery( Term(org.apache.lucene.index.Term) SolrIndexSearcher( BytesRef(org.apache.lucene.util.BytesRef) BitDocSet( DocSet(


DocSet ( BitDocSet ( Query ( HashDocSet ( SolrIndexSearcher ( SortedIntDocSet ( SimpleOrderedMap (org.apache.solr.common.util.SimpleOrderedMap)5 FieldType (org.apache.solr.schema.FieldType)5 ArrayList (java.util.ArrayList)4 BytesRef (org.apache.lucene.util.BytesRef)4 NamedList (org.apache.solr.common.util.NamedList)4 SchemaField (org.apache.solr.schema.SchemaField)4 IdentityHashMap (java.util.IdentityHashMap)3 Map (java.util.Map)3 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)3 Term (org.apache.lucene.index.Term)3 SolrException (org.apache.solr.common.SolrException)3 SolrParams (org.apache.solr.common.params.SolrParams)3 QParser ( SyntaxError (